INFO: Downloading File to /root/PM-DARTS2/...

Succeed: Total num: 37, size: 170,641,359. OK num: 37(download 37 objects).

average speed 177751000(byte/s)

0.963278(s) elapsed
INFO: Downloading succeed.
Network is under initialization...
Network successfully initialized.
WARN: ./requirements.txt not found, skip installing requirements.
Training with a single process on 1 GPUs.
Data processing configuration for current model + dataset:
	input_size: (3, 32, 32)
	interpolation: bilinear
	mean: (0.49139968, 0.48215827, 0.44653124)
	std: (0.24703233, 0.24348505, 0.26158768)
	crop_pct: 1.0
	crop_mode: center

-------------------------------
Learnable parameters
Student: 1.93M
Extra: 0.00M
-------------------------------
Scheduled epochs: 50
p_max: 0.125
search_space = s5
Using downloaded and verified file: /mnt/PM-DARTS2/data/cifar-10-python.tar.gz
Extracting /mnt/PM-DARTS2/data/cifar-10-python.tar.gz to /mnt/PM-DARTS2/data
Train: 0 [   0/390]  Loss: 2.413 (2.41)  Acc@1:  4.6875 ( 4.6875)  Acc@5: 42.1875 (42.1875)LR: 2.500e-02
Train: 0 [  50/390]  Loss: 1.807 (2.00)  Acc@1: 34.3750 (26.3787)  Acc@5: 82.8125 (78.5846)LR: 2.500e-02
Train: 0 [ 100/390]  Loss: 1.772 (1.89)  Acc@1: 39.0625 (30.4765)  Acc@5: 90.6250 (82.2401)LR: 2.500e-02
Train: 0 [ 150/390]  Loss: 1.657 (1.80)  Acc@1: 42.1875 (33.3195)  Acc@5: 93.7500 (84.7061)LR: 2.500e-02
Train: 0 [ 200/390]  Loss: 1.712 (1.73)  Acc@1: 35.9375 (35.4866)  Acc@5: 85.9375 (86.3184)LR: 2.500e-02
Train: 0 [ 250/390]  Loss: 1.697 (1.69)  Acc@1: 40.6250 (37.4875)  Acc@5: 85.9375 (87.4253)LR: 2.500e-02
Train: 0 [ 300/390]  Loss: 1.384 (1.64)  Acc@1: 51.5625 (39.4934)  Acc@5: 95.3125 (88.2216)LR: 2.500e-02
Train: 0 [ 350/390]  Loss: 1.369 (1.60)  Acc@1: 50.0000 (40.8075)  Acc@5: 93.7500 (88.9646)LR: 2.500e-02
Train: 0 [ 390/390]  Loss: 1.392 (1.57)  Acc@1: 52.5000 (41.8640)  Acc@5: 95.0000 (89.4320)LR: 2.500e-02
train_acc 41.864000
Valid: 0 [   0/390]  Loss: 1.219 (1.22)  Acc@1: 46.8750 (46.8750)  Acc@5: 96.8750 (96.8750)
Valid: 0 [  50/390]  Loss: 1.563 (1.36)  Acc@1: 42.1875 (50.0306)  Acc@5: 89.0625 (94.1176)
Valid: 0 [ 100/390]  Loss: 1.759 (1.35)  Acc@1: 35.9375 (50.6343)  Acc@5: 90.6250 (93.7809)
Valid: 0 [ 150/390]  Loss: 1.310 (1.36)  Acc@1: 48.4375 (50.6623)  Acc@5: 98.4375 (93.6051)
Valid: 0 [ 200/390]  Loss: 1.212 (1.36)  Acc@1: 57.8125 (50.3265)  Acc@5: 93.7500 (93.5401)
Valid: 0 [ 250/390]  Loss: 1.319 (1.36)  Acc@1: 48.4375 (50.4669)  Acc@5: 93.7500 (93.4885)
Valid: 0 [ 300/390]  Loss: 1.452 (1.36)  Acc@1: 53.1250 (50.4931)  Acc@5: 82.8125 (93.5995)
Valid: 0 [ 350/390]  Loss: 1.388 (1.37)  Acc@1: 56.2500 (50.5297)  Acc@5: 93.7500 (93.5452)
Valid: 0 [ 390/390]  Loss: 1.316 (1.36)  Acc@1: 45.0000 (50.6120)  Acc@5: 97.5000 (93.6640)
valid_acc 50.612000
epoch = 0   
 genotype = Genotype(normal=[('dil_conv_5x5', 1), ('max_pool_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 1), ('dil_conv_5x5', 0), ('dil_conv_5x5', 2), ('dil_conv_3x3', 4), ('dil_conv_5x5', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 3), ('dil_conv_5x5', 2), ('sep_conv_5x5', 2), ('max_pool_3x3', 0)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1253, 0.1271, 0.1207, 0.1238, 0.1266, 0.1261, 0.1259, 0.1244],
        [0.1286, 0.1220, 0.1193, 0.1217, 0.1265, 0.1276, 0.1267, 0.1277],
        [0.1260, 0.1256, 0.1211, 0.1238, 0.1259, 0.1265, 0.1236, 0.1275],
        [0.1295, 0.1216, 0.1194, 0.1220, 0.1280, 0.1260, 0.1260, 0.1275],
        [0.1285, 0.1209, 0.1188, 0.1222, 0.1267, 0.1290, 0.1273, 0.1266],
        [0.1261, 0.1252, 0.1193, 0.1215, 0.1279, 0.1266, 0.1253, 0.1281],
        [0.1288, 0.1224, 0.1200, 0.1225, 0.1274, 0.1270, 0.1252, 0.1268],
        [0.1292, 0.1206, 0.1186, 0.1218, 0.1280, 0.1264, 0.1272, 0.1281],
        [0.1299, 0.1205, 0.1192, 0.1215, 0.1280, 0.1262, 0.1272, 0.1276],
        [0.1278, 0.1256, 0.1201, 0.1222, 0.1256, 0.1281, 0.1245, 0.1261],
        [0.1293, 0.1216, 0.1192, 0.1213, 0.1281, 0.1272, 0.1255, 0.1278],
        [0.1296, 0.1201, 0.1187, 0.1205, 0.1267, 0.1277, 0.1282, 0.1286],
        [0.1303, 0.1203, 0.1189, 0.1205, 0.1285, 0.1281, 0.1268, 0.1265],
        [0.1301, 0.1196, 0.1183, 0.1202, 0.1264, 0.1278, 0.1293, 0.1285]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1231, 0.1275, 0.1249, 0.1244, 0.1232, 0.1274, 0.1236, 0.1260],
        [0.1270, 0.1245, 0.1239, 0.1255, 0.1265, 0.1251, 0.1228, 0.1245],
        [0.1235, 0.1286, 0.1259, 0.1236, 0.1269, 0.1245, 0.1231, 0.1240],
        [0.1253, 0.1235, 0.1231, 0.1254, 0.1247, 0.1254, 0.1260, 0.1265],
        [0.1257, 0.1234, 0.1211, 0.1246, 0.1266, 0.1260, 0.1256, 0.1269],
        [0.1226, 0.1265, 0.1244, 0.1250, 0.1242, 0.1257, 0.1258, 0.1258],
        [0.1266, 0.1235, 0.1229, 0.1261, 0.1252, 0.1263, 0.1250, 0.1244],
        [0.1272, 0.1241, 0.1210, 0.1256, 0.1231, 0.1259, 0.1264, 0.1267],
        [0.1258, 0.1231, 0.1218, 0.1251, 0.1242, 0.1280, 0.1253, 0.1267],
        [0.1226, 0.1284, 0.1256, 0.1242, 0.1240, 0.1257, 0.1255, 0.1240],
        [0.1261, 0.1249, 0.1242, 0.1242, 0.1237, 0.1273, 0.1265, 0.1231],
        [0.1253, 0.1246, 0.1220, 0.1252, 0.1235, 0.1286, 0.1255, 0.1253],
        [0.1251, 0.1237, 0.1225, 0.1252, 0.1258, 0.1270, 0.1243, 0.1264],
        [0.1263, 0.1235, 0.1220, 0.1248, 0.1253, 0.1254, 0.1265, 0.1263]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 1 [   0/390]  Loss: 1.249 (1.25)  Acc@1: 54.6875 (54.6875)  Acc@5: 98.4375 (98.4375)LR: 2.498e-02
Train: 1 [  50/390]  Loss: 1.052 (1.26)  Acc@1: 60.9375 (54.6875)  Acc@5: 96.8750 (94.6998)LR: 2.498e-02
Train: 1 [ 100/390]  Loss: 1.318 (1.23)  Acc@1: 43.7500 (55.1516)  Acc@5: 95.3125 (95.2197)LR: 2.498e-02
Train: 1 [ 150/390]  Loss: 1.253 (1.22)  Acc@1: 57.8125 (55.6395)  Acc@5: 90.6250 (95.1987)LR: 2.498e-02
Train: 1 [ 200/390]  Loss: 0.9577 (1.21)  Acc@1: 62.5000 (56.1178)  Acc@5: 100.0000 (95.2736)LR: 2.498e-02
Train: 1 [ 250/390]  Loss: 0.8873 (1.20)  Acc@1: 65.6250 (56.9161)  Acc@5: 96.8750 (95.4183)LR: 2.498e-02
Train: 1 [ 300/390]  Loss: 1.046 (1.19)  Acc@1: 60.9375 (57.3297)  Acc@5: 98.4375 (95.3748)LR: 2.498e-02
Train: 1 [ 350/390]  Loss: 0.8998 (1.17)  Acc@1: 65.6250 (58.0173)  Acc@5: 96.8750 (95.4327)LR: 2.498e-02
Train: 1 [ 390/390]  Loss: 1.290 (1.15)  Acc@1: 55.0000 (58.6560)  Acc@5: 97.5000 (95.5600)LR: 2.498e-02
train_acc 58.656000
Valid: 1 [   0/390]  Loss: 0.8211 (0.821)  Acc@1: 73.4375 (73.4375)  Acc@5: 96.8750 (96.8750)
Valid: 1 [  50/390]  Loss: 1.079 (1.07)  Acc@1: 60.9375 (62.4081)  Acc@5: 98.4375 (96.3235)
Valid: 1 [ 100/390]  Loss: 1.121 (1.07)  Acc@1: 62.5000 (62.1132)  Acc@5: 93.7500 (96.1479)
Valid: 1 [ 150/390]  Loss: 1.004 (1.06)  Acc@1: 67.1875 (62.2930)  Acc@5: 96.8750 (96.4507)
Valid: 1 [ 200/390]  Loss: 0.9331 (1.06)  Acc@1: 68.7500 (62.2435)  Acc@5: 98.4375 (96.4241)
Valid: 1 [ 250/390]  Loss: 0.9212 (1.05)  Acc@1: 62.5000 (62.3319)  Acc@5: 96.8750 (96.4081)
Valid: 1 [ 300/390]  Loss: 1.175 (1.05)  Acc@1: 57.8125 (62.3391)  Acc@5: 96.8750 (96.4753)
Valid: 1 [ 350/390]  Loss: 1.177 (1.06)  Acc@1: 54.6875 (62.4021)  Acc@5: 93.7500 (96.4298)
Valid: 1 [ 390/390]  Loss: 0.9051 (1.06)  Acc@1: 62.5000 (62.3800)  Acc@5: 97.5000 (96.4200)
valid_acc 62.380000
epoch = 1   
 genotype = Genotype(normal=[('max_pool_3x3', 0), ('dil_conv_5x5', 1), ('sep_conv_5x5', 2), ('dil_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 4), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 3), ('dil_conv_5x5', 2), ('sep_conv_5x5', 2), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1245, 0.1301, 0.1160, 0.1223, 0.1289, 0.1278, 0.1266, 0.1238],
        [0.1315, 0.1212, 0.1144, 0.1193, 0.1269, 0.1293, 0.1275, 0.1300],
        [0.1252, 0.1283, 0.1168, 0.1217, 0.1291, 0.1280, 0.1245, 0.1264],
        [0.1331, 0.1201, 0.1146, 0.1191, 0.1279, 0.1279, 0.1277, 0.1296],
        [0.1296, 0.1194, 0.1141, 0.1192, 0.1271, 0.1322, 0.1290, 0.1294],
        [0.1263, 0.1282, 0.1143, 0.1185, 0.1291, 0.1295, 0.1247, 0.1293],
        [0.1311, 0.1214, 0.1154, 0.1202, 0.1280, 0.1299, 0.1251, 0.1290],
        [0.1309, 0.1191, 0.1137, 0.1192, 0.1297, 0.1275, 0.1296, 0.1303],
        [0.1328, 0.1188, 0.1150, 0.1195, 0.1288, 0.1271, 0.1284, 0.1296],
        [0.1288, 0.1281, 0.1156, 0.1197, 0.1266, 0.1291, 0.1250, 0.1270],
        [0.1320, 0.1203, 0.1146, 0.1188, 0.1309, 0.1284, 0.1255, 0.1295],
        [0.1315, 0.1183, 0.1143, 0.1176, 0.1281, 0.1293, 0.1306, 0.1303],
        [0.1332, 0.1189, 0.1152, 0.1184, 0.1289, 0.1285, 0.1277, 0.1291],
        [0.1322, 0.1167, 0.1138, 0.1169, 0.1270, 0.1308, 0.1323, 0.1303]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1221, 0.1295, 0.1246, 0.1228, 0.1233, 0.1282, 0.1227, 0.1267],
        [0.1278, 0.1241, 0.1212, 0.1260, 0.1260, 0.1272, 0.1233, 0.1244],
        [0.1227, 0.1305, 0.1254, 0.1237, 0.1266, 0.1242, 0.1224, 0.1247],
        [0.1254, 0.1226, 0.1198, 0.1249, 0.1245, 0.1257, 0.1279, 0.1291],
        [0.1259, 0.1231, 0.1169, 0.1236, 0.1287, 0.1260, 0.1258, 0.1299],
        [0.1225, 0.1277, 0.1238, 0.1240, 0.1248, 0.1256, 0.1248, 0.1268],
        [0.1278, 0.1221, 0.1192, 0.1250, 0.1265, 0.1275, 0.1266, 0.1253],
        [0.1268, 0.1247, 0.1174, 0.1250, 0.1234, 0.1289, 0.1246, 0.1292],
        [0.1251, 0.1235, 0.1190, 0.1248, 0.1248, 0.1311, 0.1239, 0.1277],
        [0.1221, 0.1299, 0.1247, 0.1222, 0.1261, 0.1258, 0.1259, 0.1233],
        [0.1286, 0.1242, 0.1210, 0.1250, 0.1245, 0.1271, 0.1256, 0.1241],
        [0.1261, 0.1241, 0.1174, 0.1238, 0.1236, 0.1308, 0.1259, 0.1283],
        [0.1239, 0.1237, 0.1190, 0.1237, 0.1263, 0.1295, 0.1238, 0.1299],
        [0.1281, 0.1234, 0.1179, 0.1230, 0.1256, 0.1279, 0.1257, 0.1284]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 2 [   0/390]  Loss: 1.066 (1.07)  Acc@1: 65.6250 (65.6250)  Acc@5: 95.3125 (95.3125)LR: 2.491e-02
Train: 2 [  50/390]  Loss: 0.9858 (1.03)  Acc@1: 64.0625 (62.4387)  Acc@5: 100.0000 (96.4767)LR: 2.491e-02
Train: 2 [ 100/390]  Loss: 1.135 (1.01)  Acc@1: 60.9375 (63.6757)  Acc@5: 96.8750 (96.4573)LR: 2.491e-02
Train: 2 [ 150/390]  Loss: 0.8510 (1.01)  Acc@1: 70.3125 (63.7107)  Acc@5: 95.3125 (96.6474)LR: 2.491e-02
Train: 2 [ 200/390]  Loss: 1.085 (0.999)  Acc@1: 57.8125 (64.3113)  Acc@5: 100.0000 (96.7195)LR: 2.491e-02
Train: 2 [ 250/390]  Loss: 0.8428 (0.990)  Acc@1: 68.7500 (64.8718)  Acc@5: 96.8750 (96.7692)LR: 2.491e-02
Train: 2 [ 300/390]  Loss: 0.9560 (0.973)  Acc@1: 68.7500 (65.4329)  Acc@5: 98.4375 (96.9113)LR: 2.491e-02
Train: 2 [ 350/390]  Loss: 1.059 (0.971)  Acc@1: 59.3750 (65.4736)  Acc@5: 96.8750 (96.9462)LR: 2.491e-02
Train: 2 [ 390/390]  Loss: 1.110 (0.966)  Acc@1: 60.0000 (65.6240)  Acc@5: 97.5000 (96.9800)LR: 2.491e-02
train_acc 65.624000
Valid: 2 [   0/390]  Loss: 1.195 (1.19)  Acc@1: 56.2500 (56.2500)  Acc@5: 93.7500 (93.7500)
Valid: 2 [  50/390]  Loss: 0.9134 (1.03)  Acc@1: 67.1875 (63.4498)  Acc@5: 96.8750 (96.6299)
Valid: 2 [ 100/390]  Loss: 0.8951 (1.04)  Acc@1: 70.3125 (63.3663)  Acc@5: 96.8750 (96.5501)
Valid: 2 [ 150/390]  Loss: 1.019 (1.04)  Acc@1: 65.6250 (63.0588)  Acc@5: 93.7500 (96.5956)
Valid: 2 [ 200/390]  Loss: 0.9929 (1.04)  Acc@1: 65.6250 (62.9820)  Acc@5: 96.8750 (96.6107)
Valid: 2 [ 250/390]  Loss: 1.066 (1.05)  Acc@1: 65.6250 (62.6494)  Acc@5: 93.7500 (96.5637)
Valid: 2 [ 300/390]  Loss: 0.8754 (1.05)  Acc@1: 70.3125 (62.5208)  Acc@5: 100.0000 (96.6206)
Valid: 2 [ 350/390]  Loss: 0.9030 (1.05)  Acc@1: 68.7500 (62.2908)  Acc@5: 98.4375 (96.6391)
Valid: 2 [ 390/390]  Loss: 0.8562 (1.05)  Acc@1: 57.5000 (62.1960)  Acc@5: 100.0000 (96.6240)
valid_acc 62.196000
epoch = 2   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 2), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('sep_conv_5x5', 1), ('dil_conv_3x3', 4), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 2), ('dil_conv_5x5', 3), ('sep_conv_5x5', 4)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1246, 0.1304, 0.1132, 0.1216, 0.1327, 0.1290, 0.1253, 0.1232],
        [0.1333, 0.1199, 0.1106, 0.1179, 0.1269, 0.1312, 0.1291, 0.1310],
        [0.1254, 0.1284, 0.1135, 0.1201, 0.1315, 0.1316, 0.1240, 0.1254],
        [0.1350, 0.1187, 0.1109, 0.1169, 0.1285, 0.1289, 0.1300, 0.1312],
        [0.1314, 0.1171, 0.1100, 0.1169, 0.1277, 0.1353, 0.1296, 0.1321],
        [0.1264, 0.1286, 0.1109, 0.1168, 0.1309, 0.1315, 0.1254, 0.1295],
        [0.1325, 0.1204, 0.1119, 0.1190, 0.1290, 0.1318, 0.1256, 0.1298],
        [0.1335, 0.1165, 0.1092, 0.1173, 0.1316, 0.1289, 0.1317, 0.1314],
        [0.1360, 0.1159, 0.1106, 0.1174, 0.1298, 0.1287, 0.1290, 0.1325],
        [0.1305, 0.1283, 0.1131, 0.1186, 0.1285, 0.1297, 0.1238, 0.1276],
        [0.1333, 0.1189, 0.1108, 0.1168, 0.1334, 0.1296, 0.1277, 0.1295],
        [0.1344, 0.1162, 0.1103, 0.1162, 0.1291, 0.1307, 0.1312, 0.1318],
        [0.1353, 0.1171, 0.1115, 0.1162, 0.1297, 0.1307, 0.1282, 0.1314],
        [0.1350, 0.1141, 0.1099, 0.1142, 0.1281, 0.1327, 0.1343, 0.1316]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1212, 0.1320, 0.1251, 0.1220, 0.1236, 0.1279, 0.1211, 0.1272],
        [0.1282, 0.1246, 0.1196, 0.1255, 0.1261, 0.1296, 0.1229, 0.1235],
        [0.1221, 0.1330, 0.1263, 0.1241, 0.1257, 0.1245, 0.1207, 0.1236],
        [0.1261, 0.1237, 0.1186, 0.1241, 0.1253, 0.1256, 0.1278, 0.1288],
        [0.1258, 0.1227, 0.1141, 0.1245, 0.1302, 0.1249, 0.1265, 0.1312],
        [0.1207, 0.1285, 0.1232, 0.1226, 0.1250, 0.1258, 0.1262, 0.1279],
        [0.1286, 0.1232, 0.1181, 0.1242, 0.1254, 0.1273, 0.1270, 0.1262],
        [0.1268, 0.1240, 0.1139, 0.1246, 0.1254, 0.1300, 0.1267, 0.1287],
        [0.1260, 0.1225, 0.1156, 0.1245, 0.1241, 0.1335, 0.1247, 0.1292],
        [0.1205, 0.1306, 0.1238, 0.1211, 0.1286, 0.1254, 0.1276, 0.1223],
        [0.1289, 0.1248, 0.1193, 0.1246, 0.1249, 0.1289, 0.1249, 0.1238],
        [0.1265, 0.1230, 0.1136, 0.1244, 0.1257, 0.1298, 0.1267, 0.1302],
        [0.1252, 0.1218, 0.1139, 0.1215, 0.1274, 0.1314, 0.1261, 0.1327],
        [0.1315, 0.1199, 0.1128, 0.1205, 0.1273, 0.1327, 0.1256, 0.1297]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 3 [   0/390]  Loss: 0.9323 (0.932)  Acc@1: 65.6250 (65.6250)  Acc@5: 96.8750 (96.8750)LR: 2.479e-02
Train: 3 [  50/390]  Loss: 0.7662 (0.878)  Acc@1: 71.8750 (69.0870)  Acc@5: 95.3125 (97.5184)LR: 2.479e-02
Train: 3 [ 100/390]  Loss: 1.209 (0.891)  Acc@1: 60.9375 (68.6881)  Acc@5: 95.3125 (97.4165)LR: 2.479e-02
Train: 3 [ 150/390]  Loss: 0.7471 (0.875)  Acc@1: 73.4375 (69.2053)  Acc@5: 96.8750 (97.4959)LR: 2.479e-02
Train: 3 [ 200/390]  Loss: 0.6077 (0.872)  Acc@1: 79.6875 (69.3874)  Acc@5: 98.4375 (97.4502)LR: 2.479e-02
Train: 3 [ 250/390]  Loss: 0.8634 (0.853)  Acc@1: 75.0000 (70.0137)  Acc@5: 98.4375 (97.5971)LR: 2.479e-02
Train: 3 [ 300/390]  Loss: 0.6496 (0.845)  Acc@1: 71.8750 (70.4942)  Acc@5: 98.4375 (97.6069)LR: 2.479e-02
Train: 3 [ 350/390]  Loss: 1.088 (0.838)  Acc@1: 62.5000 (70.7265)  Acc@5: 96.8750 (97.6629)LR: 2.479e-02
Train: 3 [ 390/390]  Loss: 0.7897 (0.834)  Acc@1: 70.0000 (70.8200)  Acc@5: 97.5000 (97.7000)LR: 2.479e-02
train_acc 70.820000
Valid: 3 [   0/390]  Loss: 0.7635 (0.763)  Acc@1: 71.8750 (71.8750)  Acc@5: 100.0000 (100.0000)
Valid: 3 [  50/390]  Loss: 0.8195 (0.816)  Acc@1: 73.4375 (71.3848)  Acc@5: 96.8750 (97.6716)
Valid: 3 [ 100/390]  Loss: 0.7897 (0.833)  Acc@1: 71.8750 (70.6374)  Acc@5: 96.8750 (97.7259)
Valid: 3 [ 150/390]  Loss: 0.7926 (0.836)  Acc@1: 67.1875 (70.7781)  Acc@5: 95.3125 (97.7546)
Valid: 3 [ 200/390]  Loss: 0.8551 (0.837)  Acc@1: 65.6250 (70.7556)  Acc@5: 95.3125 (97.6990)
Valid: 3 [ 250/390]  Loss: 0.7823 (0.834)  Acc@1: 78.1250 (70.7981)  Acc@5: 100.0000 (97.7590)
Valid: 3 [ 300/390]  Loss: 0.7134 (0.838)  Acc@1: 76.5625 (70.6863)  Acc@5: 95.3125 (97.7263)
Valid: 3 [ 350/390]  Loss: 0.7867 (0.838)  Acc@1: 78.1250 (70.8244)  Acc@5: 98.4375 (97.7030)
Valid: 3 [ 390/390]  Loss: 0.7361 (0.837)  Acc@1: 67.5000 (70.9120)  Acc@5: 100.0000 (97.6920)
valid_acc 70.912000
epoch = 3   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 2), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('sep_conv_5x5', 0), ('dil_conv_3x3', 4), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('sep_conv_3x3', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1259, 0.1292, 0.1104, 0.1203, 0.1358, 0.1298, 0.1251, 0.1235],
        [0.1332, 0.1177, 0.1077, 0.1165, 0.1297, 0.1346, 0.1290, 0.1316],
        [0.1267, 0.1269, 0.1103, 0.1183, 0.1340, 0.1346, 0.1245, 0.1247],
        [0.1364, 0.1167, 0.1083, 0.1154, 0.1290, 0.1285, 0.1339, 0.1319],
        [0.1326, 0.1142, 0.1064, 0.1150, 0.1280, 0.1382, 0.1320, 0.1336],
        [0.1264, 0.1263, 0.1069, 0.1138, 0.1330, 0.1350, 0.1268, 0.1318],
        [0.1355, 0.1182, 0.1087, 0.1174, 0.1309, 0.1338, 0.1253, 0.1302],
        [0.1360, 0.1126, 0.1043, 0.1137, 0.1349, 0.1314, 0.1347, 0.1323],
        [0.1385, 0.1122, 0.1062, 0.1141, 0.1335, 0.1308, 0.1289, 0.1357],
        [0.1330, 0.1256, 0.1097, 0.1163, 0.1303, 0.1313, 0.1245, 0.1292],
        [0.1360, 0.1160, 0.1068, 0.1139, 0.1355, 0.1331, 0.1293, 0.1294],
        [0.1382, 0.1124, 0.1059, 0.1138, 0.1306, 0.1319, 0.1333, 0.1339],
        [0.1379, 0.1134, 0.1068, 0.1124, 0.1311, 0.1347, 0.1303, 0.1334],
        [0.1391, 0.1102, 0.1056, 0.1109, 0.1296, 0.1337, 0.1364, 0.1345]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1207, 0.1342, 0.1254, 0.1210, 0.1245, 0.1277, 0.1197, 0.1269],
        [0.1288, 0.1250, 0.1182, 0.1255, 0.1271, 0.1291, 0.1229, 0.1235],
        [0.1210, 0.1349, 0.1268, 0.1230, 0.1280, 0.1222, 0.1203, 0.1238],
        [0.1267, 0.1235, 0.1172, 0.1256, 0.1244, 0.1259, 0.1277, 0.1290],
        [0.1267, 0.1230, 0.1120, 0.1248, 0.1317, 0.1239, 0.1264, 0.1314],
        [0.1202, 0.1292, 0.1229, 0.1204, 0.1248, 0.1274, 0.1262, 0.1289],
        [0.1290, 0.1239, 0.1175, 0.1231, 0.1255, 0.1286, 0.1270, 0.1254],
        [0.1253, 0.1229, 0.1112, 0.1232, 0.1278, 0.1328, 0.1272, 0.1295],
        [0.1267, 0.1224, 0.1131, 0.1244, 0.1244, 0.1329, 0.1266, 0.1296],
        [0.1199, 0.1330, 0.1253, 0.1214, 0.1285, 0.1233, 0.1279, 0.1206],
        [0.1299, 0.1255, 0.1191, 0.1238, 0.1263, 0.1294, 0.1230, 0.1230],
        [0.1265, 0.1221, 0.1114, 0.1240, 0.1293, 0.1299, 0.1264, 0.1304],
        [0.1251, 0.1205, 0.1105, 0.1201, 0.1276, 0.1337, 0.1282, 0.1342],
        [0.1333, 0.1185, 0.1098, 0.1197, 0.1270, 0.1368, 0.1238, 0.1311]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 4 [   0/390]  Loss: 0.9775 (0.978)  Acc@1: 71.8750 (71.8750)  Acc@5: 96.8750 (96.8750)LR: 2.462e-02
Train: 4 [  50/390]  Loss: 0.8278 (0.768)  Acc@1: 76.5625 (73.5600)  Acc@5: 98.4375 (98.3762)LR: 2.462e-02
Train: 4 [ 100/390]  Loss: 0.6862 (0.757)  Acc@1: 78.1250 (73.5922)  Acc@5: 98.4375 (98.3137)LR: 2.462e-02
Train: 4 [ 150/390]  Loss: 0.5626 (0.773)  Acc@1: 78.1250 (73.1788)  Acc@5: 100.0000 (98.2512)LR: 2.462e-02
Train: 4 [ 200/390]  Loss: 0.7575 (0.779)  Acc@1: 71.8750 (72.8001)  Acc@5: 100.0000 (98.3053)LR: 2.462e-02
Train: 4 [ 250/390]  Loss: 0.6049 (0.767)  Acc@1: 81.2500 (73.0951)  Acc@5: 98.4375 (98.3005)LR: 2.462e-02
Train: 4 [ 300/390]  Loss: 0.7216 (0.768)  Acc@1: 75.0000 (72.9444)  Acc@5: 93.7500 (98.3596)LR: 2.462e-02
Train: 4 [ 350/390]  Loss: 0.6169 (0.763)  Acc@1: 78.1250 (73.2372)  Acc@5: 100.0000 (98.3796)LR: 2.462e-02
Train: 4 [ 390/390]  Loss: 0.6277 (0.763)  Acc@1: 70.0000 (73.2480)  Acc@5: 100.0000 (98.4000)LR: 2.462e-02
train_acc 73.248000
Valid: 4 [   0/390]  Loss: 0.6698 (0.670)  Acc@1: 78.1250 (78.1250)  Acc@5: 100.0000 (100.0000)
Valid: 4 [  50/390]  Loss: 0.6082 (0.733)  Acc@1: 81.2500 (74.1115)  Acc@5: 100.0000 (98.4375)
Valid: 4 [ 100/390]  Loss: 0.6149 (0.750)  Acc@1: 79.6875 (73.2828)  Acc@5: 96.8750 (98.2673)
Valid: 4 [ 150/390]  Loss: 0.6854 (0.771)  Acc@1: 82.8125 (72.8373)  Acc@5: 96.8750 (98.0546)
Valid: 4 [ 200/390]  Loss: 0.6487 (0.760)  Acc@1: 76.5625 (73.2587)  Acc@5: 100.0000 (98.1499)
Valid: 4 [ 250/390]  Loss: 0.7397 (0.757)  Acc@1: 71.8750 (73.2694)  Acc@5: 98.4375 (98.2134)
Valid: 4 [ 300/390]  Loss: 0.4823 (0.762)  Acc@1: 82.8125 (73.1157)  Acc@5: 100.0000 (98.2195)
Valid: 4 [ 350/390]  Loss: 0.8824 (0.762)  Acc@1: 71.8750 (73.1437)  Acc@5: 96.8750 (98.1838)
Valid: 4 [ 390/390]  Loss: 0.6905 (0.761)  Acc@1: 70.0000 (73.2600)  Acc@5: 97.5000 (98.1960)
valid_acc 73.260000
epoch = 4   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 2), ('dil_conv_3x3', 1), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('dil_conv_3x3', 4), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1267, 0.1291, 0.1076, 0.1191, 0.1410, 0.1285, 0.1247, 0.1234],
        [0.1348, 0.1170, 0.1052, 0.1160, 0.1306, 0.1362, 0.1284, 0.1318],
        [0.1270, 0.1265, 0.1071, 0.1165, 0.1370, 0.1367, 0.1254, 0.1238],
        [0.1385, 0.1150, 0.1054, 0.1141, 0.1285, 0.1289, 0.1375, 0.1319],
        [0.1348, 0.1116, 0.1023, 0.1128, 0.1297, 0.1407, 0.1337, 0.1344],
        [0.1267, 0.1255, 0.1033, 0.1112, 0.1343, 0.1380, 0.1280, 0.1330],
        [0.1371, 0.1171, 0.1055, 0.1158, 0.1323, 0.1363, 0.1243, 0.1315],
        [0.1408, 0.1097, 0.0996, 0.1113, 0.1348, 0.1324, 0.1372, 0.1343],
        [0.1420, 0.1094, 0.1017, 0.1106, 0.1348, 0.1337, 0.1303, 0.1375],
        [0.1345, 0.1254, 0.1067, 0.1145, 0.1318, 0.1330, 0.1242, 0.1300],
        [0.1391, 0.1148, 0.1040, 0.1128, 0.1372, 0.1323, 0.1311, 0.1288],
        [0.1421, 0.1094, 0.1016, 0.1118, 0.1328, 0.1328, 0.1345, 0.1352],
        [0.1415, 0.1111, 0.1030, 0.1100, 0.1316, 0.1365, 0.1308, 0.1354],
        [0.1428, 0.1077, 0.1021, 0.1086, 0.1303, 0.1354, 0.1380, 0.1351]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1201, 0.1356, 0.1256, 0.1208, 0.1250, 0.1277, 0.1191, 0.1262],
        [0.1292, 0.1251, 0.1168, 0.1260, 0.1261, 0.1308, 0.1225, 0.1237],
        [0.1199, 0.1367, 0.1276, 0.1227, 0.1293, 0.1207, 0.1192, 0.1240],
        [0.1268, 0.1236, 0.1159, 0.1255, 0.1232, 0.1269, 0.1292, 0.1288],
        [0.1271, 0.1226, 0.1099, 0.1249, 0.1321, 0.1231, 0.1269, 0.1334],
        [0.1192, 0.1305, 0.1242, 0.1178, 0.1254, 0.1284, 0.1252, 0.1293],
        [0.1299, 0.1243, 0.1177, 0.1218, 0.1254, 0.1291, 0.1268, 0.1250],
        [0.1245, 0.1219, 0.1095, 0.1235, 0.1275, 0.1349, 0.1287, 0.1295],
        [0.1277, 0.1215, 0.1119, 0.1255, 0.1235, 0.1330, 0.1276, 0.1295],
        [0.1185, 0.1334, 0.1259, 0.1221, 0.1294, 0.1230, 0.1280, 0.1197],
        [0.1301, 0.1253, 0.1183, 0.1246, 0.1272, 0.1287, 0.1225, 0.1234],
        [0.1258, 0.1201, 0.1086, 0.1230, 0.1306, 0.1329, 0.1275, 0.1315],
        [0.1268, 0.1190, 0.1079, 0.1196, 0.1274, 0.1359, 0.1296, 0.1337],
        [0.1365, 0.1159, 0.1063, 0.1183, 0.1280, 0.1406, 0.1222, 0.1320]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 5 [   0/390]  Loss: 0.6584 (0.658)  Acc@1: 68.7500 (68.7500)  Acc@5: 100.0000 (100.0000)LR: 2.441e-02
Train: 5 [  50/390]  Loss: 0.7635 (0.689)  Acc@1: 71.8750 (75.3983)  Acc@5: 96.8750 (98.5907)LR: 2.441e-02
Train: 5 [ 100/390]  Loss: 0.7222 (0.695)  Acc@1: 71.8750 (75.2475)  Acc@5: 96.8750 (98.5303)LR: 2.441e-02
Train: 5 [ 150/390]  Loss: 0.6538 (0.689)  Acc@1: 85.9375 (75.4553)  Acc@5: 98.4375 (98.6341)LR: 2.441e-02
Train: 5 [ 200/390]  Loss: 0.6031 (0.693)  Acc@1: 75.0000 (75.2643)  Acc@5: 100.0000 (98.6085)LR: 2.441e-02
Train: 5 [ 250/390]  Loss: 0.6517 (0.701)  Acc@1: 81.2500 (74.9751)  Acc@5: 100.0000 (98.5371)LR: 2.441e-02
Train: 5 [ 300/390]  Loss: 0.7502 (0.701)  Acc@1: 68.7500 (75.1402)  Acc@5: 95.3125 (98.5102)LR: 2.441e-02
Train: 5 [ 350/390]  Loss: 0.6240 (0.696)  Acc@1: 75.0000 (75.2804)  Acc@5: 98.4375 (98.5488)LR: 2.441e-02
Train: 5 [ 390/390]  Loss: 1.017 (0.698)  Acc@1: 60.0000 (75.2120)  Acc@5: 97.5000 (98.5200)LR: 2.441e-02
train_acc 75.212000
Valid: 5 [   0/390]  Loss: 0.8443 (0.844)  Acc@1: 73.4375 (73.4375)  Acc@5: 96.8750 (96.8750)
Valid: 5 [  50/390]  Loss: 0.8164 (0.680)  Acc@1: 71.8750 (76.8995)  Acc@5: 95.3125 (98.4681)
Valid: 5 [ 100/390]  Loss: 0.6503 (0.682)  Acc@1: 75.0000 (76.7791)  Acc@5: 98.4375 (98.3137)
Valid: 5 [ 150/390]  Loss: 0.5459 (0.672)  Acc@1: 81.2500 (77.0902)  Acc@5: 98.4375 (98.4478)
Valid: 5 [ 200/390]  Loss: 0.7407 (0.679)  Acc@1: 71.8750 (76.8657)  Acc@5: 96.8750 (98.4686)
Valid: 5 [ 250/390]  Loss: 0.5219 (0.676)  Acc@1: 76.5625 (76.9360)  Acc@5: 100.0000 (98.4500)
Valid: 5 [ 300/390]  Loss: 0.5769 (0.678)  Acc@1: 82.8125 (76.7442)  Acc@5: 100.0000 (98.4583)
Valid: 5 [ 350/390]  Loss: 0.7846 (0.677)  Acc@1: 70.3125 (76.6960)  Acc@5: 96.8750 (98.4420)
Valid: 5 [ 390/390]  Loss: 0.4859 (0.677)  Acc@1: 85.0000 (76.6960)  Acc@5: 100.0000 (98.4320)
valid_acc 76.696000
epoch = 5   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 2), ('sep_conv_5x5', 0), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('dil_conv_3x3', 4), ('sep_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1276, 0.1265, 0.1039, 0.1166, 0.1448, 0.1306, 0.1254, 0.1245],
        [0.1361, 0.1153, 0.1023, 0.1145, 0.1327, 0.1383, 0.1287, 0.1320],
        [0.1287, 0.1237, 0.1033, 0.1136, 0.1395, 0.1417, 0.1261, 0.1233],
        [0.1398, 0.1135, 0.1032, 0.1134, 0.1293, 0.1284, 0.1398, 0.1326],
        [0.1352, 0.1094, 0.0988, 0.1106, 0.1322, 0.1432, 0.1352, 0.1353],
        [0.1289, 0.1230, 0.0995, 0.1082, 0.1349, 0.1415, 0.1287, 0.1353],
        [0.1383, 0.1152, 0.1023, 0.1139, 0.1342, 0.1380, 0.1247, 0.1333],
        [0.1430, 0.1074, 0.0956, 0.1084, 0.1377, 0.1342, 0.1388, 0.1348],
        [0.1444, 0.1065, 0.0976, 0.1075, 0.1373, 0.1360, 0.1307, 0.1401],
        [0.1387, 0.1227, 0.1031, 0.1120, 0.1322, 0.1349, 0.1248, 0.1316],
        [0.1422, 0.1126, 0.1008, 0.1108, 0.1380, 0.1337, 0.1324, 0.1295],
        [0.1463, 0.1071, 0.0977, 0.1101, 0.1328, 0.1346, 0.1358, 0.1356],
        [0.1451, 0.1086, 0.0992, 0.1074, 0.1326, 0.1387, 0.1313, 0.1371],
        [0.1462, 0.1051, 0.0982, 0.1054, 0.1317, 0.1375, 0.1405, 0.1354]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1208, 0.1361, 0.1250, 0.1206, 0.1248, 0.1277, 0.1186, 0.1266],
        [0.1287, 0.1254, 0.1154, 0.1260, 0.1260, 0.1325, 0.1221, 0.1238],
        [0.1193, 0.1375, 0.1274, 0.1228, 0.1286, 0.1205, 0.1199, 0.1240],
        [0.1264, 0.1237, 0.1147, 0.1259, 0.1239, 0.1268, 0.1298, 0.1287],
        [0.1271, 0.1217, 0.1065, 0.1228, 0.1339, 0.1246, 0.1282, 0.1351],
        [0.1206, 0.1308, 0.1244, 0.1169, 0.1251, 0.1290, 0.1237, 0.1295],
        [0.1291, 0.1247, 0.1169, 0.1203, 0.1264, 0.1300, 0.1273, 0.1252],
        [0.1242, 0.1203, 0.1061, 0.1215, 0.1300, 0.1371, 0.1313, 0.1295],
        [0.1278, 0.1207, 0.1104, 0.1258, 0.1225, 0.1346, 0.1279, 0.1304],
        [0.1187, 0.1340, 0.1259, 0.1208, 0.1302, 0.1232, 0.1279, 0.1194],
        [0.1297, 0.1249, 0.1171, 0.1244, 0.1270, 0.1297, 0.1226, 0.1245],
        [0.1266, 0.1186, 0.1055, 0.1219, 0.1320, 0.1360, 0.1281, 0.1314],
        [0.1276, 0.1176, 0.1058, 0.1195, 0.1269, 0.1379, 0.1305, 0.1340],
        [0.1377, 0.1146, 0.1041, 0.1177, 0.1275, 0.1431, 0.1229, 0.1324]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 6 [   0/390]  Loss: 0.6668 (0.667)  Acc@1: 71.8750 (71.8750)  Acc@5: 98.4375 (98.4375)LR: 2.416e-02
Train: 6 [  50/390]  Loss: 0.7502 (0.648)  Acc@1: 78.1250 (78.2475)  Acc@5: 98.4375 (98.4069)LR: 2.416e-02
Train: 6 [ 100/390]  Loss: 0.7347 (0.676)  Acc@1: 73.4375 (77.1040)  Acc@5: 100.0000 (98.4066)LR: 2.416e-02
Train: 6 [ 150/390]  Loss: 1.006 (0.679)  Acc@1: 64.0625 (76.8729)  Acc@5: 98.4375 (98.5203)LR: 2.416e-02
Train: 6 [ 200/390]  Loss: 0.5429 (0.670)  Acc@1: 81.2500 (77.0911)  Acc@5: 100.0000 (98.5852)LR: 2.416e-02
Train: 6 [ 250/390]  Loss: 0.6353 (0.665)  Acc@1: 73.4375 (77.2908)  Acc@5: 98.4375 (98.6492)LR: 2.416e-02
Train: 6 [ 300/390]  Loss: 0.5837 (0.665)  Acc@1: 73.4375 (77.1647)  Acc@5: 100.0000 (98.7126)LR: 2.416e-02
Train: 6 [ 350/390]  Loss: 0.6121 (0.659)  Acc@1: 78.1250 (77.2480)  Acc@5: 100.0000 (98.7269)LR: 2.416e-02
Train: 6 [ 390/390]  Loss: 0.5611 (0.657)  Acc@1: 80.0000 (77.3480)  Acc@5: 97.5000 (98.7240)LR: 2.416e-02
train_acc 77.348000
Valid: 6 [   0/390]  Loss: 0.4380 (0.438)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)
Valid: 6 [  50/390]  Loss: 0.6524 (0.606)  Acc@1: 78.1250 (79.4118)  Acc@5: 100.0000 (98.7132)
Valid: 6 [ 100/390]  Loss: 0.8143 (0.635)  Acc@1: 73.4375 (78.2333)  Acc@5: 96.8750 (98.6541)
Valid: 6 [ 150/390]  Loss: 0.5650 (0.634)  Acc@1: 79.6875 (77.8974)  Acc@5: 96.8750 (98.7065)
Valid: 6 [ 200/390]  Loss: 0.4463 (0.634)  Acc@1: 84.3750 (77.9151)  Acc@5: 98.4375 (98.7096)
Valid: 6 [ 250/390]  Loss: 0.5596 (0.638)  Acc@1: 85.9375 (77.6706)  Acc@5: 98.4375 (98.6554)
Valid: 6 [ 300/390]  Loss: 0.5307 (0.642)  Acc@1: 76.5625 (77.6474)  Acc@5: 100.0000 (98.6348)
Valid: 6 [ 350/390]  Loss: 0.8548 (0.642)  Acc@1: 70.3125 (77.6576)  Acc@5: 96.8750 (98.6200)
Valid: 6 [ 390/390]  Loss: 0.5490 (0.640)  Acc@1: 82.5000 (77.7280)  Acc@5: 92.5000 (98.6200)
valid_acc 77.728000
epoch = 6   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 0), ('dil_conv_3x3', 2), ('dil_conv_3x3', 4), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1289, 0.1253, 0.1011, 0.1152, 0.1481, 0.1316, 0.1248, 0.1251],
        [0.1368, 0.1147, 0.1006, 0.1141, 0.1335, 0.1390, 0.1288, 0.1326],
        [0.1304, 0.1216, 0.0996, 0.1108, 0.1421, 0.1458, 0.1273, 0.1223],
        [0.1407, 0.1125, 0.1011, 0.1128, 0.1302, 0.1294, 0.1412, 0.1321],
        [0.1372, 0.1068, 0.0955, 0.1086, 0.1341, 0.1444, 0.1369, 0.1365],
        [0.1314, 0.1207, 0.0960, 0.1050, 0.1358, 0.1447, 0.1303, 0.1360],
        [0.1383, 0.1139, 0.0995, 0.1121, 0.1363, 0.1397, 0.1251, 0.1350],
        [0.1464, 0.1050, 0.0921, 0.1060, 0.1385, 0.1361, 0.1414, 0.1345],
        [0.1470, 0.1037, 0.0941, 0.1045, 0.1389, 0.1387, 0.1318, 0.1412],
        [0.1418, 0.1206, 0.0995, 0.1091, 0.1353, 0.1356, 0.1248, 0.1334],
        [0.1442, 0.1111, 0.0981, 0.1093, 0.1392, 0.1333, 0.1347, 0.1301],
        [0.1515, 0.1048, 0.0941, 0.1086, 0.1352, 0.1363, 0.1342, 0.1353],
        [0.1500, 0.1061, 0.0957, 0.1052, 0.1345, 0.1378, 0.1322, 0.1386],
        [0.1497, 0.1021, 0.0943, 0.1023, 0.1340, 0.1383, 0.1425, 0.1368]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1217, 0.1368, 0.1249, 0.1198, 0.1236, 0.1271, 0.1190, 0.1270],
        [0.1277, 0.1262, 0.1152, 0.1273, 0.1257, 0.1322, 0.1224, 0.1233],
        [0.1198, 0.1378, 0.1270, 0.1226, 0.1284, 0.1213, 0.1190, 0.1240],
        [0.1256, 0.1243, 0.1142, 0.1263, 0.1244, 0.1273, 0.1303, 0.1278],
        [0.1271, 0.1205, 0.1042, 0.1216, 0.1355, 0.1253, 0.1301, 0.1358],
        [0.1215, 0.1314, 0.1247, 0.1151, 0.1254, 0.1291, 0.1225, 0.1303],
        [0.1277, 0.1253, 0.1174, 0.1207, 0.1270, 0.1307, 0.1269, 0.1244],
        [0.1233, 0.1189, 0.1041, 0.1208, 0.1306, 0.1394, 0.1324, 0.1306],
        [0.1280, 0.1186, 0.1092, 0.1264, 0.1222, 0.1341, 0.1295, 0.1320],
        [0.1183, 0.1338, 0.1254, 0.1214, 0.1299, 0.1233, 0.1280, 0.1199],
        [0.1284, 0.1255, 0.1170, 0.1248, 0.1280, 0.1305, 0.1218, 0.1240],
        [0.1276, 0.1168, 0.1029, 0.1217, 0.1324, 0.1381, 0.1280, 0.1325],
        [0.1281, 0.1147, 0.1036, 0.1186, 0.1261, 0.1409, 0.1323, 0.1356],
        [0.1409, 0.1124, 0.1017, 0.1169, 0.1277, 0.1460, 0.1220, 0.1324]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 7 [   0/390]  Loss: 0.5464 (0.546)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)LR: 2.386e-02
Train: 7 [  50/390]  Loss: 0.6103 (0.611)  Acc@1: 75.0000 (78.2169)  Acc@5: 98.4375 (99.2341)LR: 2.386e-02
Train: 7 [ 100/390]  Loss: 0.4630 (0.608)  Acc@1: 84.3750 (78.6355)  Acc@5: 98.4375 (98.9480)LR: 2.386e-02
Train: 7 [ 150/390]  Loss: 0.5266 (0.615)  Acc@1: 81.2500 (78.5286)  Acc@5: 100.0000 (98.9652)LR: 2.386e-02
Train: 7 [ 200/390]  Loss: 0.8022 (0.609)  Acc@1: 71.8750 (78.7702)  Acc@5: 98.4375 (98.9817)LR: 2.386e-02
Train: 7 [ 250/390]  Loss: 0.5768 (0.605)  Acc@1: 84.3750 (78.8969)  Acc@5: 100.0000 (98.9480)LR: 2.386e-02
Train: 7 [ 300/390]  Loss: 0.5925 (0.612)  Acc@1: 81.2500 (78.6181)  Acc@5: 98.4375 (98.8839)LR: 2.386e-02
Train: 7 [ 350/390]  Loss: 0.5580 (0.612)  Acc@1: 82.8125 (78.6325)  Acc@5: 98.4375 (98.8604)LR: 2.386e-02
Train: 7 [ 390/390]  Loss: 0.7665 (0.613)  Acc@1: 70.0000 (78.5840)  Acc@5: 97.5000 (98.8680)LR: 2.386e-02
train_acc 78.584000
Valid: 7 [   0/390]  Loss: 0.9078 (0.908)  Acc@1: 73.4375 (73.4375)  Acc@5: 95.3125 (95.3125)
Valid: 7 [  50/390]  Loss: 0.8646 (0.640)  Acc@1: 75.0000 (77.6961)  Acc@5: 95.3125 (98.6826)
Valid: 7 [ 100/390]  Loss: 0.6107 (0.651)  Acc@1: 78.1250 (77.8465)  Acc@5: 100.0000 (98.4839)
Valid: 7 [ 150/390]  Loss: 0.7481 (0.650)  Acc@1: 73.4375 (77.7732)  Acc@5: 98.4375 (98.5410)
Valid: 7 [ 200/390]  Loss: 0.5434 (0.644)  Acc@1: 82.8125 (78.1095)  Acc@5: 98.4375 (98.5386)
Valid: 7 [ 250/390]  Loss: 0.6259 (0.642)  Acc@1: 73.4375 (78.1312)  Acc@5: 98.4375 (98.5620)
Valid: 7 [ 300/390]  Loss: 0.7781 (0.642)  Acc@1: 75.0000 (78.0316)  Acc@5: 95.3125 (98.5309)
Valid: 7 [ 350/390]  Loss: 0.4000 (0.640)  Acc@1: 85.9375 (78.2452)  Acc@5: 100.0000 (98.5933)
Valid: 7 [ 390/390]  Loss: 0.5986 (0.643)  Acc@1: 80.0000 (78.1240)  Acc@5: 100.0000 (98.5880)
valid_acc 78.124000
epoch = 7   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('dil_conv_3x3', 4), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1299, 0.1235, 0.0987, 0.1134, 0.1533, 0.1316, 0.1240, 0.1258],
        [0.1385, 0.1120, 0.0981, 0.1128, 0.1330, 0.1406, 0.1309, 0.1342],
        [0.1313, 0.1195, 0.0972, 0.1092, 0.1435, 0.1503, 0.1278, 0.1212],
        [0.1430, 0.1093, 0.0984, 0.1111, 0.1318, 0.1295, 0.1447, 0.1322],
        [0.1386, 0.1037, 0.0928, 0.1068, 0.1362, 0.1466, 0.1366, 0.1387],
        [0.1333, 0.1178, 0.0933, 0.1029, 0.1382, 0.1459, 0.1306, 0.1379],
        [0.1412, 0.1108, 0.0969, 0.1107, 0.1386, 0.1406, 0.1247, 0.1364],
        [0.1509, 0.1013, 0.0885, 0.1038, 0.1395, 0.1387, 0.1423, 0.1349],
        [0.1506, 0.1005, 0.0907, 0.1019, 0.1407, 0.1398, 0.1320, 0.1437],
        [0.1453, 0.1189, 0.0974, 0.1079, 0.1366, 0.1337, 0.1263, 0.1340],
        [0.1481, 0.1080, 0.0956, 0.1076, 0.1407, 0.1322, 0.1371, 0.1308],
        [0.1560, 0.1013, 0.0909, 0.1066, 0.1369, 0.1377, 0.1350, 0.1356],
        [0.1532, 0.1031, 0.0924, 0.1026, 0.1370, 0.1379, 0.1335, 0.1403],
        [0.1557, 0.0986, 0.0909, 0.0994, 0.1349, 0.1381, 0.1440, 0.1384]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1221, 0.1377, 0.1245, 0.1210, 0.1228, 0.1269, 0.1182, 0.1269],
        [0.1269, 0.1269, 0.1145, 0.1280, 0.1258, 0.1338, 0.1225, 0.1217],
        [0.1189, 0.1388, 0.1271, 0.1232, 0.1283, 0.1211, 0.1185, 0.1242],
        [0.1252, 0.1252, 0.1144, 0.1262, 0.1241, 0.1278, 0.1300, 0.1271],
        [0.1282, 0.1195, 0.1026, 0.1220, 0.1367, 0.1239, 0.1300, 0.1371],
        [0.1216, 0.1321, 0.1243, 0.1150, 0.1260, 0.1278, 0.1221, 0.1312],
        [0.1267, 0.1256, 0.1171, 0.1196, 0.1273, 0.1316, 0.1273, 0.1247],
        [0.1251, 0.1171, 0.1021, 0.1214, 0.1305, 0.1406, 0.1316, 0.1317],
        [0.1289, 0.1161, 0.1070, 0.1258, 0.1225, 0.1355, 0.1315, 0.1327],
        [0.1174, 0.1340, 0.1244, 0.1218, 0.1299, 0.1241, 0.1282, 0.1201],
        [0.1276, 0.1268, 0.1178, 0.1243, 0.1289, 0.1299, 0.1208, 0.1240],
        [0.1275, 0.1161, 0.1015, 0.1223, 0.1328, 0.1387, 0.1290, 0.1319],
        [0.1291, 0.1129, 0.1018, 0.1188, 0.1257, 0.1428, 0.1332, 0.1357],
        [0.1420, 0.1107, 0.0995, 0.1162, 0.1284, 0.1486, 0.1222, 0.1324]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 8 [   0/390]  Loss: 0.5366 (0.537)  Acc@1: 79.6875 (79.6875)  Acc@5: 100.0000 (100.0000)LR: 2.352e-02
Train: 8 [  50/390]  Loss: 0.6489 (0.575)  Acc@1: 79.6875 (79.7181)  Acc@5: 98.4375 (98.8051)LR: 2.352e-02
Train: 8 [ 100/390]  Loss: 0.7394 (0.585)  Acc@1: 76.5625 (79.3626)  Acc@5: 95.3125 (98.8243)LR: 2.352e-02
Train: 8 [ 150/390]  Loss: 0.5830 (0.589)  Acc@1: 85.9375 (79.2632)  Acc@5: 98.4375 (98.9031)LR: 2.352e-02
Train: 8 [ 200/390]  Loss: 0.4885 (0.579)  Acc@1: 76.5625 (79.6098)  Acc@5: 100.0000 (98.9817)LR: 2.352e-02
Train: 8 [ 250/390]  Loss: 0.7335 (0.581)  Acc@1: 70.3125 (79.6190)  Acc@5: 98.4375 (98.9915)LR: 2.352e-02
Train: 8 [ 300/390]  Loss: 0.4340 (0.578)  Acc@1: 81.2500 (79.8173)  Acc@5: 98.4375 (99.0137)LR: 2.352e-02
Train: 8 [ 350/390]  Loss: 0.7047 (0.583)  Acc@1: 75.0000 (79.7009)  Acc@5: 98.4375 (98.9494)LR: 2.352e-02
Train: 8 [ 390/390]  Loss: 0.5225 (0.579)  Acc@1: 77.5000 (79.8400)  Acc@5: 100.0000 (98.9720)LR: 2.352e-02
train_acc 79.840000
Valid: 8 [   0/390]  Loss: 1.089 (1.09)  Acc@1: 70.3125 (70.3125)  Acc@5: 96.8750 (96.8750)
Valid: 8 [  50/390]  Loss: 0.7910 (0.668)  Acc@1: 68.7500 (77.3591)  Acc@5: 100.0000 (99.0502)
Valid: 8 [ 100/390]  Loss: 0.6990 (0.649)  Acc@1: 75.0000 (77.7692)  Acc@5: 98.4375 (98.8861)
Valid: 8 [ 150/390]  Loss: 0.5802 (0.652)  Acc@1: 75.0000 (77.6076)  Acc@5: 100.0000 (98.9031)
Valid: 8 [ 200/390]  Loss: 0.4920 (0.665)  Acc@1: 84.3750 (77.0756)  Acc@5: 98.4375 (98.8417)
Valid: 8 [ 250/390]  Loss: 0.6766 (0.660)  Acc@1: 78.1250 (77.2535)  Acc@5: 98.4375 (98.8235)
Valid: 8 [ 300/390]  Loss: 0.8048 (0.655)  Acc@1: 73.4375 (77.3308)  Acc@5: 96.8750 (98.8164)
Valid: 8 [ 350/390]  Loss: 0.4183 (0.654)  Acc@1: 85.9375 (77.4528)  Acc@5: 100.0000 (98.8070)
Valid: 8 [ 390/390]  Loss: 0.6158 (0.656)  Acc@1: 70.0000 (77.3960)  Acc@5: 100.0000 (98.8040)
valid_acc 77.396000
epoch = 8   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('dil_conv_3x3', 4), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1310, 0.1217, 0.0959, 0.1120, 0.1585, 0.1311, 0.1227, 0.1272],
        [0.1397, 0.1107, 0.0961, 0.1124, 0.1342, 0.1401, 0.1315, 0.1352],
        [0.1327, 0.1174, 0.0945, 0.1076, 0.1460, 0.1520, 0.1283, 0.1214],
        [0.1444, 0.1077, 0.0962, 0.1101, 0.1323, 0.1295, 0.1472, 0.1325],
        [0.1405, 0.1008, 0.0901, 0.1056, 0.1381, 0.1475, 0.1368, 0.1407],
        [0.1358, 0.1157, 0.0903, 0.1007, 0.1398, 0.1480, 0.1306, 0.1390],
        [0.1427, 0.1094, 0.0947, 0.1098, 0.1397, 0.1414, 0.1253, 0.1371],
        [0.1553, 0.0981, 0.0854, 0.1020, 0.1420, 0.1383, 0.1432, 0.1358],
        [0.1550, 0.0973, 0.0874, 0.0997, 0.1430, 0.1413, 0.1307, 0.1456],
        [0.1488, 0.1171, 0.0948, 0.1063, 0.1375, 0.1347, 0.1264, 0.1344],
        [0.1516, 0.1060, 0.0930, 0.1059, 0.1416, 0.1332, 0.1374, 0.1314],
        [0.1611, 0.0980, 0.0873, 0.1044, 0.1392, 0.1377, 0.1363, 0.1359],
        [0.1565, 0.0998, 0.0887, 0.0998, 0.1398, 0.1381, 0.1345, 0.1428],
        [0.1607, 0.0951, 0.0870, 0.0962, 0.1369, 0.1398, 0.1452, 0.1390]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1216, 0.1372, 0.1233, 0.1205, 0.1245, 0.1287, 0.1174, 0.1268],
        [0.1270, 0.1249, 0.1119, 0.1282, 0.1269, 0.1335, 0.1244, 0.1233],
        [0.1192, 0.1388, 0.1268, 0.1230, 0.1276, 0.1217, 0.1184, 0.1246],
        [0.1251, 0.1236, 0.1126, 0.1261, 0.1255, 0.1307, 0.1306, 0.1258],
        [0.1272, 0.1195, 0.1011, 0.1217, 0.1358, 0.1248, 0.1308, 0.1390],
        [0.1222, 0.1316, 0.1241, 0.1138, 0.1269, 0.1279, 0.1226, 0.1310],
        [0.1278, 0.1242, 0.1155, 0.1187, 0.1282, 0.1342, 0.1266, 0.1248],
        [0.1247, 0.1156, 0.0998, 0.1207, 0.1312, 0.1421, 0.1321, 0.1338],
        [0.1290, 0.1135, 0.1044, 0.1243, 0.1245, 0.1370, 0.1334, 0.1339],
        [0.1165, 0.1335, 0.1235, 0.1218, 0.1304, 0.1261, 0.1293, 0.1190],
        [0.1281, 0.1262, 0.1164, 0.1243, 0.1293, 0.1307, 0.1205, 0.1247],
        [0.1274, 0.1161, 0.0998, 0.1230, 0.1325, 0.1399, 0.1301, 0.1312],
        [0.1303, 0.1112, 0.0994, 0.1187, 0.1254, 0.1449, 0.1343, 0.1358],
        [0.1426, 0.1088, 0.0969, 0.1156, 0.1289, 0.1517, 0.1224, 0.1331]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 9 [   0/390]  Loss: 0.4141 (0.414)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)LR: 2.313e-02
Train: 9 [  50/390]  Loss: 0.7128 (0.552)  Acc@1: 79.6875 (80.6985)  Acc@5: 98.4375 (99.0809)LR: 2.313e-02
Train: 9 [ 100/390]  Loss: 0.5162 (0.571)  Acc@1: 84.3750 (79.9350)  Acc@5: 100.0000 (99.1182)LR: 2.313e-02
Train: 9 [ 150/390]  Loss: 0.3730 (0.562)  Acc@1: 85.9375 (80.3394)  Acc@5: 100.0000 (99.1618)LR: 2.313e-02
Train: 9 [ 200/390]  Loss: 0.7564 (0.558)  Acc@1: 71.8750 (80.6592)  Acc@5: 98.4375 (99.0905)LR: 2.313e-02
Train: 9 [ 250/390]  Loss: 0.6367 (0.562)  Acc@1: 81.2500 (80.5092)  Acc@5: 96.8750 (99.0351)LR: 2.313e-02
Train: 9 [ 300/390]  Loss: 0.4988 (0.560)  Acc@1: 81.2500 (80.4921)  Acc@5: 100.0000 (99.0552)LR: 2.313e-02
Train: 9 [ 350/390]  Loss: 0.3266 (0.564)  Acc@1: 84.3750 (80.3152)  Acc@5: 100.0000 (99.0340)LR: 2.313e-02
Train: 9 [ 390/390]  Loss: 0.6253 (0.564)  Acc@1: 85.0000 (80.2880)  Acc@5: 97.5000 (99.0480)LR: 2.313e-02
train_acc 80.288000
Valid: 9 [   0/390]  Loss: 0.4585 (0.458)  Acc@1: 84.3750 (84.3750)  Acc@5: 100.0000 (100.0000)
Valid: 9 [  50/390]  Loss: 0.5840 (0.624)  Acc@1: 84.3750 (78.4926)  Acc@5: 98.4375 (98.7439)
Valid: 9 [ 100/390]  Loss: 0.5844 (0.613)  Acc@1: 79.6875 (78.8676)  Acc@5: 98.4375 (98.8552)
Valid: 9 [ 150/390]  Loss: 0.6573 (0.622)  Acc@1: 81.2500 (78.8700)  Acc@5: 96.8750 (98.7169)
Valid: 9 [ 200/390]  Loss: 0.6648 (0.622)  Acc@1: 78.1250 (78.7858)  Acc@5: 98.4375 (98.7484)
Valid: 9 [ 250/390]  Loss: 0.5933 (0.620)  Acc@1: 81.2500 (78.8160)  Acc@5: 96.8750 (98.7363)
Valid: 9 [ 300/390]  Loss: 0.8600 (0.616)  Acc@1: 67.1875 (78.9452)  Acc@5: 98.4375 (98.7074)
Valid: 9 [ 350/390]  Loss: 0.6865 (0.618)  Acc@1: 75.0000 (78.8506)  Acc@5: 96.8750 (98.7046)
Valid: 9 [ 390/390]  Loss: 0.4968 (0.621)  Acc@1: 80.0000 (78.8920)  Acc@5: 97.5000 (98.6360)
valid_acc 78.892000
epoch = 9   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('dil_conv_3x3', 1), ('sep_conv_5x5', 0), ('dil_conv_5x5', 3), ('dil_conv_3x3', 4), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1322, 0.1199, 0.0935, 0.1107, 0.1608, 0.1326, 0.1230, 0.1272],
        [0.1402, 0.1093, 0.0939, 0.1114, 0.1350, 0.1413, 0.1329, 0.1359],
        [0.1344, 0.1156, 0.0920, 0.1061, 0.1483, 0.1530, 0.1299, 0.1207],
        [0.1475, 0.1062, 0.0939, 0.1091, 0.1317, 0.1294, 0.1506, 0.1315],
        [0.1415, 0.0984, 0.0873, 0.1045, 0.1389, 0.1489, 0.1374, 0.1431],
        [0.1378, 0.1141, 0.0879, 0.0993, 0.1429, 0.1480, 0.1305, 0.1396],
        [0.1452, 0.1080, 0.0917, 0.1080, 0.1409, 0.1417, 0.1273, 0.1372],
        [0.1589, 0.0957, 0.0823, 0.1007, 0.1429, 0.1396, 0.1449, 0.1351],
        [0.1600, 0.0941, 0.0837, 0.0971, 0.1452, 0.1431, 0.1305, 0.1462],
        [0.1527, 0.1147, 0.0917, 0.1044, 0.1375, 0.1358, 0.1276, 0.1356],
        [0.1544, 0.1040, 0.0901, 0.1040, 0.1431, 0.1343, 0.1387, 0.1313],
        [0.1667, 0.0952, 0.0838, 0.1034, 0.1419, 0.1360, 0.1372, 0.1357],
        [0.1618, 0.0966, 0.0847, 0.0972, 0.1419, 0.1382, 0.1352, 0.1445],
        [0.1668, 0.0918, 0.0836, 0.0938, 0.1376, 0.1403, 0.1465, 0.1395]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1220, 0.1378, 0.1228, 0.1201, 0.1234, 0.1274, 0.1184, 0.1282],
        [0.1256, 0.1240, 0.1107, 0.1291, 0.1273, 0.1347, 0.1256, 0.1230],
        [0.1187, 0.1397, 0.1269, 0.1230, 0.1276, 0.1221, 0.1187, 0.1233],
        [0.1253, 0.1238, 0.1126, 0.1259, 0.1253, 0.1330, 0.1306, 0.1235],
        [0.1262, 0.1189, 0.1003, 0.1218, 0.1351, 0.1260, 0.1324, 0.1393],
        [0.1225, 0.1325, 0.1248, 0.1136, 0.1277, 0.1257, 0.1217, 0.1314],
        [0.1277, 0.1237, 0.1152, 0.1193, 0.1287, 0.1351, 0.1266, 0.1237],
        [0.1237, 0.1147, 0.0989, 0.1213, 0.1323, 0.1420, 0.1325, 0.1345],
        [0.1300, 0.1120, 0.1033, 0.1247, 0.1241, 0.1374, 0.1339, 0.1346],
        [0.1161, 0.1337, 0.1231, 0.1226, 0.1298, 0.1256, 0.1302, 0.1189],
        [0.1275, 0.1258, 0.1160, 0.1247, 0.1298, 0.1301, 0.1205, 0.1256],
        [0.1277, 0.1152, 0.0991, 0.1241, 0.1327, 0.1401, 0.1299, 0.1311],
        [0.1313, 0.1093, 0.0981, 0.1190, 0.1233, 0.1457, 0.1356, 0.1377],
        [0.1453, 0.1070, 0.0956, 0.1157, 0.1277, 0.1535, 0.1218, 0.1335]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 10 [   0/390]  Loss: 0.4217 (0.422)  Acc@1: 84.3750 (84.3750)  Acc@5: 98.4375 (98.4375)LR: 2.271e-02
Train: 10 [  50/390]  Loss: 0.4547 (0.491)  Acc@1: 87.5000 (82.9963)  Acc@5: 100.0000 (99.1728)LR: 2.271e-02
Train: 10 [ 100/390]  Loss: 0.5555 (0.520)  Acc@1: 75.0000 (81.8533)  Acc@5: 100.0000 (99.2110)LR: 2.271e-02
Train: 10 [ 150/390]  Loss: 0.5722 (0.533)  Acc@1: 79.6875 (81.3949)  Acc@5: 100.0000 (99.1308)LR: 2.271e-02
Train: 10 [ 200/390]  Loss: 0.6429 (0.534)  Acc@1: 76.5625 (81.2345)  Acc@5: 96.8750 (99.1682)LR: 2.271e-02
Train: 10 [ 250/390]  Loss: 0.6519 (0.540)  Acc@1: 78.1250 (81.1753)  Acc@5: 98.4375 (99.1347)LR: 2.271e-02
Train: 10 [ 300/390]  Loss: 0.4622 (0.536)  Acc@1: 81.2500 (81.3382)  Acc@5: 100.0000 (99.1175)LR: 2.271e-02
Train: 10 [ 350/390]  Loss: 0.6381 (0.534)  Acc@1: 76.5625 (81.3746)  Acc@5: 98.4375 (99.1364)LR: 2.271e-02
Train: 10 [ 390/390]  Loss: 0.3634 (0.532)  Acc@1: 85.0000 (81.4280)  Acc@5: 100.0000 (99.1560)LR: 2.271e-02
train_acc 81.428000
Valid: 10 [   0/390]  Loss: 0.4848 (0.485)  Acc@1: 79.6875 (79.6875)  Acc@5: 100.0000 (100.0000)
Valid: 10 [  50/390]  Loss: 0.7351 (0.635)  Acc@1: 75.0000 (78.7377)  Acc@5: 100.0000 (98.7439)
Valid: 10 [ 100/390]  Loss: 0.3116 (0.601)  Acc@1: 89.0625 (79.5637)  Acc@5: 98.4375 (98.8861)
Valid: 10 [ 150/390]  Loss: 0.7021 (0.595)  Acc@1: 78.1250 (79.7496)  Acc@5: 100.0000 (98.9031)
Valid: 10 [ 200/390]  Loss: 0.6022 (0.595)  Acc@1: 75.0000 (79.9596)  Acc@5: 100.0000 (98.8495)
Valid: 10 [ 250/390]  Loss: 0.7515 (0.596)  Acc@1: 73.4375 (79.7684)  Acc@5: 100.0000 (98.8733)
Valid: 10 [ 300/390]  Loss: 0.4329 (0.597)  Acc@1: 85.9375 (79.6719)  Acc@5: 100.0000 (98.8735)
Valid: 10 [ 350/390]  Loss: 0.6406 (0.595)  Acc@1: 79.6875 (79.7231)  Acc@5: 98.4375 (98.8337)
Valid: 10 [ 390/390]  Loss: 0.4425 (0.593)  Acc@1: 85.0000 (79.8240)  Acc@5: 100.0000 (98.8400)
valid_acc 79.824000
epoch = 10   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('dil_conv_3x3', 1), ('sep_conv_5x5', 0), ('dil_conv_3x3', 2), ('dil_conv_3x3', 4), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1323, 0.1182, 0.0919, 0.1102, 0.1628, 0.1334, 0.1229, 0.1283],
        [0.1423, 0.1071, 0.0913, 0.1100, 0.1359, 0.1429, 0.1332, 0.1374],
        [0.1358, 0.1137, 0.0907, 0.1057, 0.1497, 0.1545, 0.1301, 0.1198],
        [0.1512, 0.1039, 0.0917, 0.1077, 0.1323, 0.1296, 0.1507, 0.1330],
        [0.1436, 0.0954, 0.0845, 0.1032, 0.1407, 0.1496, 0.1391, 0.1438],
        [0.1393, 0.1115, 0.0858, 0.0976, 0.1448, 0.1487, 0.1323, 0.1400],
        [0.1481, 0.1056, 0.0889, 0.1061, 0.1412, 0.1432, 0.1284, 0.1386],
        [0.1641, 0.0924, 0.0790, 0.0990, 0.1429, 0.1414, 0.1468, 0.1344],
        [0.1660, 0.0905, 0.0803, 0.0945, 0.1467, 0.1447, 0.1314, 0.1458],
        [0.1573, 0.1130, 0.0903, 0.1042, 0.1379, 0.1364, 0.1263, 0.1346],
        [0.1583, 0.1011, 0.0874, 0.1022, 0.1445, 0.1344, 0.1399, 0.1321],
        [0.1718, 0.0921, 0.0807, 0.1019, 0.1439, 0.1380, 0.1365, 0.1352],
        [0.1666, 0.0928, 0.0811, 0.0946, 0.1443, 0.1381, 0.1368, 0.1455],
        [0.1739, 0.0885, 0.0802, 0.0910, 0.1387, 0.1415, 0.1464, 0.1398]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1220, 0.1369, 0.1212, 0.1193, 0.1242, 0.1274, 0.1191, 0.1299],
        [0.1250, 0.1235, 0.1095, 0.1307, 0.1273, 0.1359, 0.1255, 0.1226],
        [0.1185, 0.1398, 0.1263, 0.1219, 0.1279, 0.1221, 0.1191, 0.1244],
        [0.1251, 0.1231, 0.1117, 0.1261, 0.1258, 0.1339, 0.1310, 0.1233],
        [0.1257, 0.1177, 0.0994, 0.1219, 0.1344, 0.1254, 0.1352, 0.1404],
        [0.1228, 0.1328, 0.1242, 0.1128, 0.1280, 0.1246, 0.1220, 0.1329],
        [0.1284, 0.1236, 0.1149, 0.1196, 0.1282, 0.1348, 0.1258, 0.1247],
        [0.1233, 0.1135, 0.0977, 0.1213, 0.1337, 0.1430, 0.1323, 0.1352],
        [0.1312, 0.1105, 0.1023, 0.1250, 0.1250, 0.1374, 0.1349, 0.1338],
        [0.1160, 0.1332, 0.1221, 0.1232, 0.1309, 0.1259, 0.1301, 0.1187],
        [0.1269, 0.1249, 0.1146, 0.1252, 0.1317, 0.1304, 0.1206, 0.1257],
        [0.1277, 0.1143, 0.0980, 0.1247, 0.1336, 0.1414, 0.1299, 0.1305],
        [0.1310, 0.1079, 0.0969, 0.1187, 0.1241, 0.1462, 0.1376, 0.1377],
        [0.1465, 0.1057, 0.0943, 0.1159, 0.1263, 0.1551, 0.1207, 0.1354]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 11 [   0/390]  Loss: 0.4603 (0.460)  Acc@1: 87.5000 (87.5000)  Acc@5: 98.4375 (98.4375)LR: 2.225e-02
Train: 11 [  50/390]  Loss: 0.6493 (0.522)  Acc@1: 73.4375 (82.3836)  Acc@5: 100.0000 (99.1115)LR: 2.225e-02
Train: 11 [ 100/390]  Loss: 0.3804 (0.519)  Acc@1: 87.5000 (82.3484)  Acc@5: 100.0000 (99.1182)LR: 2.225e-02
Train: 11 [ 150/390]  Loss: 0.3310 (0.517)  Acc@1: 85.9375 (82.2227)  Acc@5: 100.0000 (99.1722)LR: 2.225e-02
Train: 11 [ 200/390]  Loss: 0.5493 (0.516)  Acc@1: 81.2500 (82.0818)  Acc@5: 100.0000 (99.1682)LR: 2.225e-02
Train: 11 [ 250/390]  Loss: 0.5302 (0.513)  Acc@1: 79.6875 (82.2771)  Acc@5: 100.0000 (99.2094)LR: 2.225e-02
Train: 11 [ 300/390]  Loss: 0.5596 (0.514)  Acc@1: 79.6875 (82.1844)  Acc@5: 100.0000 (99.2162)LR: 2.225e-02
Train: 11 [ 350/390]  Loss: 0.5169 (0.511)  Acc@1: 81.2500 (82.2872)  Acc@5: 100.0000 (99.2388)LR: 2.225e-02
Train: 11 [ 390/390]  Loss: 0.5036 (0.513)  Acc@1: 80.0000 (82.2280)  Acc@5: 100.0000 (99.2240)LR: 2.225e-02
train_acc 82.228000
Valid: 11 [   0/390]  Loss: 0.8001 (0.800)  Acc@1: 75.0000 (75.0000)  Acc@5: 98.4375 (98.4375)
Valid: 11 [  50/390]  Loss: 0.5477 (0.541)  Acc@1: 79.6875 (82.0159)  Acc@5: 100.0000 (98.9890)
Valid: 11 [ 100/390]  Loss: 0.6723 (0.551)  Acc@1: 82.8125 (81.6677)  Acc@5: 96.8750 (98.9016)
Valid: 11 [ 150/390]  Loss: 0.5578 (0.548)  Acc@1: 84.3750 (81.4259)  Acc@5: 96.8750 (98.9445)
Valid: 11 [ 200/390]  Loss: 0.7377 (0.549)  Acc@1: 73.4375 (81.3511)  Acc@5: 98.4375 (98.9195)
Valid: 11 [ 250/390]  Loss: 0.5970 (0.542)  Acc@1: 78.1250 (81.5364)  Acc@5: 100.0000 (98.9168)
Valid: 11 [ 300/390]  Loss: 0.6742 (0.540)  Acc@1: 82.8125 (81.6705)  Acc@5: 96.8750 (98.9514)
Valid: 11 [ 350/390]  Loss: 0.5251 (0.538)  Acc@1: 76.5625 (81.6952)  Acc@5: 100.0000 (98.9583)
Valid: 11 [ 390/390]  Loss: 0.5440 (0.535)  Acc@1: 82.5000 (81.7760)  Acc@5: 100.0000 (98.9880)
valid_acc 81.776000
epoch = 11   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('dil_conv_3x3', 1), ('sep_conv_5x5', 0), ('dil_conv_3x3', 2), ('dil_conv_3x3', 4), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1322, 0.1156, 0.0893, 0.1082, 0.1700, 0.1342, 0.1231, 0.1274],
        [0.1443, 0.1051, 0.0888, 0.1088, 0.1373, 0.1451, 0.1329, 0.1376],
        [0.1368, 0.1111, 0.0887, 0.1045, 0.1526, 0.1555, 0.1319, 0.1189],
        [0.1543, 0.1025, 0.0900, 0.1075, 0.1324, 0.1287, 0.1519, 0.1328],
        [0.1462, 0.0925, 0.0819, 0.1025, 0.1425, 0.1497, 0.1409, 0.1436],
        [0.1425, 0.1089, 0.0838, 0.0968, 0.1447, 0.1488, 0.1333, 0.1413],
        [0.1506, 0.1037, 0.0864, 0.1050, 0.1432, 0.1438, 0.1287, 0.1387],
        [0.1706, 0.0888, 0.0758, 0.0980, 0.1425, 0.1417, 0.1483, 0.1343],
        [0.1720, 0.0871, 0.0771, 0.0921, 0.1483, 0.1449, 0.1320, 0.1465],
        [0.1619, 0.1101, 0.0883, 0.1037, 0.1382, 0.1362, 0.1276, 0.1340],
        [0.1615, 0.0992, 0.0853, 0.1013, 0.1461, 0.1343, 0.1404, 0.1321],
        [0.1768, 0.0884, 0.0775, 0.1004, 0.1464, 0.1386, 0.1358, 0.1362],
        [0.1734, 0.0894, 0.0780, 0.0925, 0.1458, 0.1380, 0.1365, 0.1464],
        [0.1806, 0.0848, 0.0769, 0.0883, 0.1401, 0.1409, 0.1490, 0.1395]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1213, 0.1380, 0.1218, 0.1202, 0.1232, 0.1268, 0.1192, 0.1295],
        [0.1251, 0.1241, 0.1095, 0.1315, 0.1260, 0.1359, 0.1250, 0.1228],
        [0.1170, 0.1407, 0.1273, 0.1217, 0.1280, 0.1218, 0.1186, 0.1249],
        [0.1259, 0.1234, 0.1116, 0.1254, 0.1264, 0.1338, 0.1313, 0.1223],
        [0.1266, 0.1157, 0.0979, 0.1227, 0.1343, 0.1246, 0.1366, 0.1418],
        [0.1216, 0.1338, 0.1255, 0.1119, 0.1296, 0.1238, 0.1217, 0.1322],
        [0.1281, 0.1240, 0.1152, 0.1193, 0.1299, 0.1346, 0.1245, 0.1243],
        [0.1236, 0.1117, 0.0965, 0.1227, 0.1353, 0.1437, 0.1320, 0.1346],
        [0.1331, 0.1082, 0.1006, 0.1256, 0.1243, 0.1376, 0.1364, 0.1342],
        [0.1155, 0.1345, 0.1228, 0.1239, 0.1307, 0.1260, 0.1297, 0.1169],
        [0.1254, 0.1255, 0.1147, 0.1253, 0.1328, 0.1307, 0.1200, 0.1256],
        [0.1284, 0.1124, 0.0962, 0.1259, 0.1339, 0.1422, 0.1297, 0.1313],
        [0.1314, 0.1052, 0.0944, 0.1182, 0.1248, 0.1486, 0.1385, 0.1389],
        [0.1484, 0.1046, 0.0926, 0.1168, 0.1262, 0.1554, 0.1205, 0.1353]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 12 [   0/390]  Loss: 0.3673 (0.367)  Acc@1: 87.5000 (87.5000)  Acc@5: 98.4375 (98.4375)LR: 2.175e-02
Train: 12 [  50/390]  Loss: 0.7636 (0.492)  Acc@1: 73.4375 (82.6287)  Acc@5: 98.4375 (99.3260)LR: 2.175e-02
Train: 12 [ 100/390]  Loss: 0.5409 (0.486)  Acc@1: 78.1250 (82.9363)  Acc@5: 100.0000 (99.3193)LR: 2.175e-02
Train: 12 [ 150/390]  Loss: 0.6589 (0.498)  Acc@1: 70.3125 (82.4607)  Acc@5: 98.4375 (99.2136)LR: 2.175e-02
Train: 12 [ 200/390]  Loss: 0.3509 (0.491)  Acc@1: 87.5000 (82.8280)  Acc@5: 100.0000 (99.2537)LR: 2.175e-02
Train: 12 [ 250/390]  Loss: 0.3865 (0.488)  Acc@1: 84.3750 (82.9495)  Acc@5: 100.0000 (99.2281)LR: 2.175e-02
Train: 12 [ 300/390]  Loss: 0.3415 (0.488)  Acc@1: 85.9375 (83.0253)  Acc@5: 98.4375 (99.2369)LR: 2.175e-02
Train: 12 [ 350/390]  Loss: 0.3916 (0.486)  Acc@1: 89.0625 (83.0440)  Acc@5: 100.0000 (99.2566)LR: 2.175e-02
Train: 12 [ 390/390]  Loss: 0.3598 (0.490)  Acc@1: 87.5000 (83.0040)  Acc@5: 100.0000 (99.2200)LR: 2.175e-02
train_acc 83.004000
Valid: 12 [   0/390]  Loss: 0.9021 (0.902)  Acc@1: 70.3125 (70.3125)  Acc@5: 95.3125 (95.3125)
Valid: 12 [  50/390]  Loss: 0.4618 (0.572)  Acc@1: 81.2500 (80.1777)  Acc@5: 100.0000 (99.0502)
Valid: 12 [ 100/390]  Loss: 0.4105 (0.574)  Acc@1: 87.5000 (80.3682)  Acc@5: 100.0000 (98.9325)
Valid: 12 [ 150/390]  Loss: 0.3731 (0.576)  Acc@1: 87.5000 (80.4325)  Acc@5: 100.0000 (98.9652)
Valid: 12 [ 200/390]  Loss: 0.8439 (0.576)  Acc@1: 73.4375 (80.4027)  Acc@5: 98.4375 (98.9739)
Valid: 12 [ 250/390]  Loss: 0.6300 (0.572)  Acc@1: 76.5625 (80.5279)  Acc@5: 98.4375 (98.9915)
Valid: 12 [ 300/390]  Loss: 0.4190 (0.570)  Acc@1: 87.5000 (80.7309)  Acc@5: 98.4375 (98.9462)
Valid: 12 [ 350/390]  Loss: 0.5194 (0.571)  Acc@1: 81.2500 (80.6357)  Acc@5: 100.0000 (98.9717)
Valid: 12 [ 390/390]  Loss: 0.6642 (0.567)  Acc@1: 80.0000 (80.7600)  Acc@5: 97.5000 (98.9680)
valid_acc 80.760000
epoch = 12   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_5x5', 0), ('dil_conv_3x3', 1), ('dil_conv_3x3', 2), ('sep_conv_5x5', 0), ('dil_conv_3x3', 4), ('sep_conv_3x3', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_5x5', 2), ('dil_conv_3x3', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1326, 0.1141, 0.0871, 0.1078, 0.1742, 0.1337, 0.1227, 0.1279],
        [0.1464, 0.1037, 0.0867, 0.1083, 0.1377, 0.1455, 0.1321, 0.1396],
        [0.1372, 0.1098, 0.0865, 0.1037, 0.1569, 0.1577, 0.1304, 0.1178],
        [0.1576, 0.1013, 0.0881, 0.1073, 0.1331, 0.1294, 0.1518, 0.1313],
        [0.1497, 0.0906, 0.0795, 0.1017, 0.1424, 0.1498, 0.1412, 0.1450],
        [0.1446, 0.1076, 0.0819, 0.0961, 0.1445, 0.1490, 0.1342, 0.1421],
        [0.1539, 0.1018, 0.0840, 0.1040, 0.1437, 0.1450, 0.1292, 0.1384],
        [0.1752, 0.0866, 0.0732, 0.0966, 0.1429, 0.1424, 0.1493, 0.1339],
        [0.1789, 0.0845, 0.0742, 0.0904, 0.1489, 0.1452, 0.1316, 0.1464],
        [0.1658, 0.1080, 0.0858, 0.1025, 0.1385, 0.1365, 0.1285, 0.1345],
        [0.1669, 0.0972, 0.0827, 0.0998, 0.1483, 0.1338, 0.1402, 0.1311],
        [0.1833, 0.0855, 0.0743, 0.0982, 0.1493, 0.1385, 0.1356, 0.1353],
        [0.1805, 0.0862, 0.0745, 0.0897, 0.1475, 0.1374, 0.1373, 0.1468],
        [0.1875, 0.0813, 0.0734, 0.0849, 0.1409, 0.1418, 0.1502, 0.1400]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1215, 0.1382, 0.1208, 0.1206, 0.1234, 0.1269, 0.1196, 0.1290],
        [0.1244, 0.1236, 0.1083, 0.1333, 0.1266, 0.1364, 0.1251, 0.1223],
        [0.1160, 0.1419, 0.1276, 0.1215, 0.1290, 0.1214, 0.1181, 0.1244],
        [0.1267, 0.1237, 0.1112, 0.1242, 0.1261, 0.1353, 0.1313, 0.1215],
        [0.1263, 0.1148, 0.0968, 0.1235, 0.1338, 0.1238, 0.1376, 0.1434],
        [0.1213, 0.1342, 0.1257, 0.1126, 0.1305, 0.1222, 0.1215, 0.1319],
        [0.1286, 0.1240, 0.1149, 0.1190, 0.1321, 0.1342, 0.1240, 0.1232],
        [0.1229, 0.1103, 0.0953, 0.1234, 0.1366, 0.1444, 0.1319, 0.1354],
        [0.1353, 0.1059, 0.0987, 0.1254, 0.1235, 0.1385, 0.1386, 0.1342],
        [0.1142, 0.1351, 0.1223, 0.1240, 0.1312, 0.1279, 0.1287, 0.1166],
        [0.1250, 0.1248, 0.1135, 0.1258, 0.1347, 0.1297, 0.1199, 0.1266],
        [0.1292, 0.1115, 0.0952, 0.1270, 0.1347, 0.1421, 0.1293, 0.1309],
        [0.1318, 0.1027, 0.0920, 0.1171, 0.1247, 0.1500, 0.1403, 0.1414],
        [0.1495, 0.1030, 0.0907, 0.1161, 0.1280, 0.1565, 0.1197, 0.1364]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 13 [   0/390]  Loss: 0.3211 (0.321)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 2.121e-02
Train: 13 [  50/390]  Loss: 0.4144 (0.465)  Acc@1: 90.6250 (84.1912)  Acc@5: 98.4375 (99.4485)LR: 2.121e-02
Train: 13 [ 100/390]  Loss: 0.4725 (0.464)  Acc@1: 81.2500 (83.9728)  Acc@5: 100.0000 (99.4431)LR: 2.121e-02
Train: 13 [ 150/390]  Loss: 0.4715 (0.479)  Acc@1: 85.9375 (83.3713)  Acc@5: 100.0000 (99.3895)LR: 2.121e-02
Train: 13 [ 200/390]  Loss: 0.3513 (0.482)  Acc@1: 89.0625 (83.2711)  Acc@5: 100.0000 (99.3470)LR: 2.121e-02
Train: 13 [ 250/390]  Loss: 0.2826 (0.480)  Acc@1: 92.1875 (83.4848)  Acc@5: 98.4375 (99.3339)LR: 2.121e-02
Train: 13 [ 300/390]  Loss: 0.3561 (0.477)  Acc@1: 85.9375 (83.5185)  Acc@5: 100.0000 (99.3252)LR: 2.121e-02
Train: 13 [ 350/390]  Loss: 0.3880 (0.472)  Acc@1: 84.3750 (83.6271)  Acc@5: 100.0000 (99.3545)LR: 2.121e-02
Train: 13 [ 390/390]  Loss: 0.4279 (0.471)  Acc@1: 85.0000 (83.6120)  Acc@5: 100.0000 (99.3440)LR: 2.121e-02
train_acc 83.612000
Valid: 13 [   0/390]  Loss: 0.5529 (0.553)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)
Valid: 13 [  50/390]  Loss: 0.6364 (0.593)  Acc@1: 78.1250 (80.1777)  Acc@5: 100.0000 (98.7745)
Valid: 13 [ 100/390]  Loss: 0.7104 (0.563)  Acc@1: 81.2500 (81.2500)  Acc@5: 98.4375 (98.9635)
Valid: 13 [ 150/390]  Loss: 0.4308 (0.554)  Acc@1: 89.0625 (81.4570)  Acc@5: 100.0000 (98.9756)
Valid: 13 [ 200/390]  Loss: 0.4865 (0.555)  Acc@1: 82.8125 (81.5765)  Acc@5: 96.8750 (98.9428)
Valid: 13 [ 250/390]  Loss: 0.4639 (0.554)  Acc@1: 79.6875 (81.3558)  Acc@5: 100.0000 (98.9791)
Valid: 13 [ 300/390]  Loss: 0.8065 (0.556)  Acc@1: 71.8750 (81.2240)  Acc@5: 96.8750 (99.0241)
Valid: 13 [ 350/390]  Loss: 0.3426 (0.558)  Acc@1: 89.0625 (81.2856)  Acc@5: 100.0000 (99.0340)
Valid: 13 [ 390/390]  Loss: 0.5509 (0.556)  Acc@1: 82.5000 (81.2960)  Acc@5: 100.0000 (99.0640)
valid_acc 81.296000
epoch = 13   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_3x3', 2), ('sep_conv_5x5', 0), ('sep_conv_3x3', 2), ('dil_conv_3x3', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('sep_conv_5x5', 2), ('dil_conv_3x3', 3), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1333, 0.1117, 0.0852, 0.1078, 0.1785, 0.1337, 0.1223, 0.1275],
        [0.1485, 0.1009, 0.0840, 0.1070, 0.1385, 0.1466, 0.1330, 0.1416],
        [0.1388, 0.1071, 0.0845, 0.1033, 0.1601, 0.1593, 0.1306, 0.1164],
        [0.1613, 0.0987, 0.0855, 0.1059, 0.1333, 0.1304, 0.1533, 0.1316],
        [0.1536, 0.0875, 0.0767, 0.1006, 0.1441, 0.1515, 0.1416, 0.1444],
        [0.1466, 0.1046, 0.0796, 0.0951, 0.1456, 0.1493, 0.1374, 0.1416],
        [0.1588, 0.0990, 0.0811, 0.1022, 0.1436, 0.1471, 0.1295, 0.1388],
        [0.1810, 0.0831, 0.0700, 0.0948, 0.1432, 0.1438, 0.1497, 0.1343],
        [0.1876, 0.0811, 0.0710, 0.0885, 0.1481, 0.1454, 0.1315, 0.1468],
        [0.1706, 0.1056, 0.0842, 0.1023, 0.1381, 0.1348, 0.1295, 0.1350],
        [0.1720, 0.0943, 0.0801, 0.0976, 0.1506, 0.1337, 0.1407, 0.1309],
        [0.1892, 0.0822, 0.0711, 0.0959, 0.1527, 0.1367, 0.1376, 0.1346],
        [0.1877, 0.0826, 0.0712, 0.0873, 0.1494, 0.1363, 0.1382, 0.1473],
        [0.1960, 0.0778, 0.0703, 0.0823, 0.1412, 0.1420, 0.1513, 0.1391]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1217, 0.1391, 0.1199, 0.1202, 0.1233, 0.1263, 0.1192, 0.1303],
        [0.1238, 0.1237, 0.1077, 0.1331, 0.1269, 0.1384, 0.1252, 0.1212],
        [0.1152, 0.1434, 0.1275, 0.1207, 0.1294, 0.1208, 0.1178, 0.1251],
        [0.1263, 0.1247, 0.1115, 0.1226, 0.1271, 0.1354, 0.1315, 0.1210],
        [0.1268, 0.1129, 0.0951, 0.1237, 0.1333, 0.1239, 0.1397, 0.1446],
        [0.1210, 0.1342, 0.1254, 0.1124, 0.1308, 0.1215, 0.1222, 0.1324],
        [0.1283, 0.1240, 0.1150, 0.1183, 0.1337, 0.1337, 0.1250, 0.1220],
        [0.1239, 0.1082, 0.0944, 0.1247, 0.1374, 0.1436, 0.1320, 0.1359],
        [0.1364, 0.1035, 0.0973, 0.1263, 0.1223, 0.1382, 0.1422, 0.1336],
        [0.1133, 0.1348, 0.1210, 0.1248, 0.1321, 0.1290, 0.1297, 0.1153],
        [0.1233, 0.1247, 0.1134, 0.1256, 0.1354, 0.1300, 0.1203, 0.1274],
        [0.1302, 0.1098, 0.0938, 0.1286, 0.1342, 0.1415, 0.1295, 0.1323],
        [0.1325, 0.1006, 0.0903, 0.1176, 0.1246, 0.1509, 0.1414, 0.1421],
        [0.1514, 0.1006, 0.0883, 0.1151, 0.1294, 0.1588, 0.1188, 0.1375]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 14 [   0/390]  Loss: 0.4302 (0.430)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)LR: 2.065e-02
Train: 14 [  50/390]  Loss: 0.7152 (0.446)  Acc@1: 68.7500 (84.7120)  Acc@5: 98.4375 (99.3566)LR: 2.065e-02
Train: 14 [ 100/390]  Loss: 0.3327 (0.434)  Acc@1: 85.9375 (85.1330)  Acc@5: 100.0000 (99.3967)LR: 2.065e-02
Train: 14 [ 150/390]  Loss: 0.3943 (0.444)  Acc@1: 85.9375 (84.7475)  Acc@5: 100.0000 (99.3274)LR: 2.065e-02
Train: 14 [ 200/390]  Loss: 0.4314 (0.454)  Acc@1: 85.9375 (84.3672)  Acc@5: 100.0000 (99.3004)LR: 2.065e-02
Train: 14 [ 250/390]  Loss: 0.6109 (0.459)  Acc@1: 81.2500 (84.2878)  Acc@5: 98.4375 (99.3277)LR: 2.065e-02
Train: 14 [ 300/390]  Loss: 0.6068 (0.459)  Acc@1: 78.1250 (84.2660)  Acc@5: 100.0000 (99.3304)LR: 2.065e-02
Train: 14 [ 350/390]  Loss: 0.5000 (0.455)  Acc@1: 82.8125 (84.3260)  Acc@5: 100.0000 (99.3501)LR: 2.065e-02
Train: 14 [ 390/390]  Loss: 0.4335 (0.456)  Acc@1: 85.0000 (84.2360)  Acc@5: 97.5000 (99.3440)LR: 2.065e-02
train_acc 84.236000
Valid: 14 [   0/390]  Loss: 0.6367 (0.637)  Acc@1: 81.2500 (81.2500)  Acc@5: 98.4375 (98.4375)
Valid: 14 [  50/390]  Loss: 0.5740 (0.559)  Acc@1: 79.6875 (82.0466)  Acc@5: 96.8750 (98.8051)
Valid: 14 [ 100/390]  Loss: 0.4261 (0.532)  Acc@1: 84.3750 (82.6269)  Acc@5: 100.0000 (99.0254)
Valid: 14 [ 150/390]  Loss: 0.6170 (0.537)  Acc@1: 81.2500 (82.4089)  Acc@5: 96.8750 (99.0791)
Valid: 14 [ 200/390]  Loss: 0.6613 (0.529)  Acc@1: 79.6875 (82.3850)  Acc@5: 98.4375 (99.0283)
Valid: 14 [ 250/390]  Loss: 0.4485 (0.531)  Acc@1: 87.5000 (82.4265)  Acc@5: 100.0000 (98.9915)
Valid: 14 [ 300/390]  Loss: 0.4828 (0.522)  Acc@1: 84.3750 (82.6516)  Acc@5: 98.4375 (99.0033)
Valid: 14 [ 350/390]  Loss: 0.4705 (0.519)  Acc@1: 85.9375 (82.7591)  Acc@5: 100.0000 (99.0340)
Valid: 14 [ 390/390]  Loss: 0.1881 (0.517)  Acc@1: 95.0000 (82.8400)  Acc@5: 100.0000 (99.0320)
valid_acc 82.840000
epoch = 14   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_3x3', 2), ('sep_conv_5x5', 3), ('sep_conv_3x3', 2), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1348, 0.1091, 0.0824, 0.1061, 0.1832, 0.1354, 0.1208, 0.1283],
        [0.1509, 0.0986, 0.0817, 0.1058, 0.1393, 0.1475, 0.1336, 0.1425],
        [0.1413, 0.1049, 0.0823, 0.1021, 0.1635, 0.1605, 0.1303, 0.1152],
        [0.1649, 0.0963, 0.0833, 0.1044, 0.1329, 0.1320, 0.1545, 0.1317],
        [0.1563, 0.0844, 0.0741, 0.0991, 0.1447, 0.1530, 0.1436, 0.1447],
        [0.1503, 0.1023, 0.0774, 0.0939, 0.1478, 0.1484, 0.1388, 0.1413],
        [0.1627, 0.0969, 0.0790, 0.1010, 0.1439, 0.1479, 0.1295, 0.1390],
        [0.1877, 0.0801, 0.0674, 0.0935, 0.1431, 0.1455, 0.1495, 0.1331],
        [0.1944, 0.0777, 0.0679, 0.0860, 0.1469, 0.1487, 0.1316, 0.1467],
        [0.1775, 0.1028, 0.0818, 0.1010, 0.1375, 0.1341, 0.1303, 0.1350],
        [0.1781, 0.0915, 0.0775, 0.0956, 0.1528, 0.1329, 0.1416, 0.1300],
        [0.1981, 0.0784, 0.0679, 0.0937, 0.1535, 0.1358, 0.1380, 0.1348],
        [0.1957, 0.0786, 0.0678, 0.0844, 0.1504, 0.1364, 0.1380, 0.1487],
        [0.2054, 0.0743, 0.0671, 0.0792, 0.1409, 0.1420, 0.1520, 0.1391]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1208, 0.1395, 0.1193, 0.1202, 0.1223, 0.1267, 0.1204, 0.1307],
        [0.1240, 0.1238, 0.1071, 0.1333, 0.1268, 0.1381, 0.1265, 0.1204],
        [0.1142, 0.1446, 0.1277, 0.1210, 0.1292, 0.1193, 0.1182, 0.1259],
        [0.1261, 0.1251, 0.1115, 0.1215, 0.1268, 0.1374, 0.1316, 0.1201],
        [0.1274, 0.1114, 0.0934, 0.1244, 0.1327, 0.1245, 0.1408, 0.1453],
        [0.1196, 0.1354, 0.1263, 0.1126, 0.1323, 0.1206, 0.1214, 0.1315],
        [0.1282, 0.1248, 0.1161, 0.1184, 0.1335, 0.1336, 0.1258, 0.1197],
        [0.1247, 0.1071, 0.0936, 0.1264, 0.1387, 0.1433, 0.1309, 0.1354],
        [0.1362, 0.1013, 0.0959, 0.1263, 0.1217, 0.1391, 0.1452, 0.1344],
        [0.1125, 0.1354, 0.1212, 0.1260, 0.1321, 0.1303, 0.1280, 0.1145],
        [0.1234, 0.1243, 0.1131, 0.1260, 0.1382, 0.1291, 0.1199, 0.1260],
        [0.1319, 0.1086, 0.0926, 0.1306, 0.1338, 0.1408, 0.1288, 0.1328],
        [0.1337, 0.0985, 0.0890, 0.1179, 0.1237, 0.1518, 0.1423, 0.1431],
        [0.1529, 0.0985, 0.0863, 0.1146, 0.1287, 0.1596, 0.1192, 0.1403]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 15 [   0/390]  Loss: 0.3264 (0.326)  Acc@1: 89.0625 (89.0625)  Acc@5: 98.4375 (98.4375)LR: 2.005e-02
Train: 15 [  50/390]  Loss: 0.5207 (0.427)  Acc@1: 81.2500 (85.3554)  Acc@5: 98.4375 (99.2647)LR: 2.005e-02
Train: 15 [ 100/390]  Loss: 0.5605 (0.441)  Acc@1: 85.9375 (84.8082)  Acc@5: 100.0000 (99.3657)LR: 2.005e-02
Train: 15 [ 150/390]  Loss: 0.4540 (0.443)  Acc@1: 82.8125 (84.5095)  Acc@5: 100.0000 (99.3895)LR: 2.005e-02
Train: 15 [ 200/390]  Loss: 0.3457 (0.443)  Acc@1: 84.3750 (84.4838)  Acc@5: 100.0000 (99.4325)LR: 2.005e-02
Train: 15 [ 250/390]  Loss: 0.6164 (0.446)  Acc@1: 84.3750 (84.4871)  Acc@5: 95.3125 (99.3713)LR: 2.005e-02
Train: 15 [ 300/390]  Loss: 0.4462 (0.452)  Acc@1: 84.3750 (84.3958)  Acc@5: 100.0000 (99.3719)LR: 2.005e-02
Train: 15 [ 350/390]  Loss: 0.2617 (0.451)  Acc@1: 93.7500 (84.4729)  Acc@5: 100.0000 (99.3812)LR: 2.005e-02
Train: 15 [ 390/390]  Loss: 0.3383 (0.449)  Acc@1: 90.0000 (84.6160)  Acc@5: 100.0000 (99.3800)LR: 2.005e-02
train_acc 84.616000
Valid: 15 [   0/390]  Loss: 0.8173 (0.817)  Acc@1: 71.8750 (71.8750)  Acc@5: 100.0000 (100.0000)
Valid: 15 [  50/390]  Loss: 0.4295 (0.481)  Acc@1: 81.2500 (83.6091)  Acc@5: 100.0000 (98.9277)
Valid: 15 [ 100/390]  Loss: 0.5149 (0.477)  Acc@1: 81.2500 (83.8954)  Acc@5: 98.4375 (99.1182)
Valid: 15 [ 150/390]  Loss: 0.6129 (0.481)  Acc@1: 73.4375 (83.7438)  Acc@5: 100.0000 (99.1929)
Valid: 15 [ 200/390]  Loss: 0.6509 (0.489)  Acc@1: 78.1250 (83.4655)  Acc@5: 96.8750 (99.1371)
Valid: 15 [ 250/390]  Loss: 0.3286 (0.495)  Acc@1: 89.0625 (83.2420)  Acc@5: 100.0000 (99.1534)
Valid: 15 [ 300/390]  Loss: 0.6812 (0.504)  Acc@1: 81.2500 (83.1084)  Acc@5: 98.4375 (99.0968)
Valid: 15 [ 350/390]  Loss: 0.4579 (0.500)  Acc@1: 84.3750 (83.1864)  Acc@5: 100.0000 (99.1097)
Valid: 15 [ 390/390]  Loss: 0.4608 (0.501)  Acc@1: 87.5000 (83.2280)  Acc@5: 100.0000 (99.1160)
valid_acc 83.228000
epoch = 15   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 3), ('dil_conv_3x3', 2), ('sep_conv_3x3', 1), ('sep_conv_3x3', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1360, 0.1072, 0.0806, 0.1053, 0.1865, 0.1352, 0.1200, 0.1291],
        [0.1532, 0.0956, 0.0789, 0.1036, 0.1396, 0.1498, 0.1346, 0.1446],
        [0.1429, 0.1025, 0.0809, 0.1017, 0.1646, 0.1624, 0.1309, 0.1142],
        [0.1694, 0.0933, 0.0807, 0.1023, 0.1326, 0.1328, 0.1558, 0.1330],
        [0.1581, 0.0808, 0.0713, 0.0969, 0.1454, 0.1563, 0.1451, 0.1459],
        [0.1544, 0.0997, 0.0759, 0.0930, 0.1472, 0.1477, 0.1389, 0.1432],
        [0.1669, 0.0944, 0.0767, 0.0990, 0.1437, 0.1491, 0.1305, 0.1397],
        [0.1931, 0.0767, 0.0648, 0.0915, 0.1440, 0.1479, 0.1495, 0.1326],
        [0.2017, 0.0746, 0.0654, 0.0841, 0.1450, 0.1502, 0.1314, 0.1476],
        [0.1832, 0.1003, 0.0804, 0.1005, 0.1355, 0.1327, 0.1320, 0.1355],
        [0.1848, 0.0882, 0.0750, 0.0934, 0.1542, 0.1329, 0.1416, 0.1298],
        [0.2073, 0.0747, 0.0651, 0.0917, 0.1533, 0.1362, 0.1373, 0.1344],
        [0.2026, 0.0750, 0.0651, 0.0821, 0.1514, 0.1352, 0.1386, 0.1499],
        [0.2147, 0.0707, 0.0638, 0.0761, 0.1407, 0.1409, 0.1531, 0.1401]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1212, 0.1387, 0.1186, 0.1218, 0.1223, 0.1262, 0.1212, 0.1300],
        [0.1236, 0.1234, 0.1065, 0.1337, 0.1279, 0.1376, 0.1278, 0.1194],
        [0.1142, 0.1447, 0.1275, 0.1205, 0.1292, 0.1203, 0.1168, 0.1268],
        [0.1263, 0.1251, 0.1115, 0.1215, 0.1267, 0.1373, 0.1314, 0.1202],
        [0.1268, 0.1106, 0.0915, 0.1237, 0.1332, 0.1248, 0.1416, 0.1478],
        [0.1198, 0.1346, 0.1254, 0.1140, 0.1313, 0.1220, 0.1207, 0.1323],
        [0.1281, 0.1248, 0.1160, 0.1190, 0.1322, 0.1332, 0.1268, 0.1200],
        [0.1241, 0.1063, 0.0917, 0.1254, 0.1386, 0.1468, 0.1311, 0.1361],
        [0.1362, 0.0994, 0.0938, 0.1258, 0.1211, 0.1408, 0.1476, 0.1353],
        [0.1128, 0.1354, 0.1208, 0.1277, 0.1315, 0.1322, 0.1268, 0.1129],
        [0.1223, 0.1246, 0.1128, 0.1281, 0.1380, 0.1296, 0.1193, 0.1253],
        [0.1322, 0.1075, 0.0904, 0.1306, 0.1342, 0.1410, 0.1295, 0.1347],
        [0.1344, 0.0971, 0.0870, 0.1180, 0.1229, 0.1534, 0.1429, 0.1443],
        [0.1541, 0.0972, 0.0849, 0.1149, 0.1286, 0.1587, 0.1191, 0.1426]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 16 [   0/390]  Loss: 0.2994 (0.299)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 1.943e-02
Train: 16 [  50/390]  Loss: 0.4421 (0.401)  Acc@1: 82.8125 (86.2132)  Acc@5: 100.0000 (99.4485)LR: 1.943e-02
Train: 16 [ 100/390]  Loss: 0.5812 (0.423)  Acc@1: 81.2500 (85.4425)  Acc@5: 100.0000 (99.3967)LR: 1.943e-02
Train: 16 [ 150/390]  Loss: 0.5631 (0.426)  Acc@1: 76.5625 (85.0269)  Acc@5: 98.4375 (99.4205)LR: 1.943e-02
Train: 16 [ 200/390]  Loss: 0.3803 (0.420)  Acc@1: 87.5000 (85.2223)  Acc@5: 100.0000 (99.4170)LR: 1.943e-02
Train: 16 [ 250/390]  Loss: 0.3361 (0.423)  Acc@1: 90.6250 (85.1282)  Acc@5: 100.0000 (99.4397)LR: 1.943e-02
Train: 16 [ 300/390]  Loss: 0.2522 (0.421)  Acc@1: 90.6250 (85.2990)  Acc@5: 100.0000 (99.4549)LR: 1.943e-02
Train: 16 [ 350/390]  Loss: 0.3735 (0.421)  Acc@1: 85.9375 (85.2520)  Acc@5: 100.0000 (99.4569)LR: 1.943e-02
Train: 16 [ 390/390]  Loss: 0.7540 (0.426)  Acc@1: 75.0000 (85.0840)  Acc@5: 95.0000 (99.4320)LR: 1.943e-02
train_acc 85.084000
Valid: 16 [   0/390]  Loss: 0.4826 (0.483)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)
Valid: 16 [  50/390]  Loss: 0.7056 (0.482)  Acc@1: 84.3750 (83.8542)  Acc@5: 100.0000 (99.1728)
Valid: 16 [ 100/390]  Loss: 0.5198 (0.489)  Acc@1: 84.3750 (83.9728)  Acc@5: 95.3125 (99.1182)
Valid: 16 [ 150/390]  Loss: 0.4278 (0.490)  Acc@1: 89.0625 (83.8680)  Acc@5: 100.0000 (99.1101)
Valid: 16 [ 200/390]  Loss: 0.5577 (0.484)  Acc@1: 87.5000 (84.0252)  Acc@5: 96.8750 (99.1138)
Valid: 16 [ 250/390]  Loss: 0.7732 (0.478)  Acc@1: 71.8750 (84.1758)  Acc@5: 100.0000 (99.1658)
Valid: 16 [ 300/390]  Loss: 0.5669 (0.479)  Acc@1: 79.6875 (84.1933)  Acc@5: 98.4375 (99.2006)
Valid: 16 [ 350/390]  Loss: 0.5929 (0.479)  Acc@1: 76.5625 (84.0812)  Acc@5: 100.0000 (99.2210)
Valid: 16 [ 390/390]  Loss: 0.3724 (0.477)  Acc@1: 82.5000 (84.0320)  Acc@5: 100.0000 (99.2280)
valid_acc 84.032000
epoch = 16   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_3x3', 1), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('sep_conv_3x3', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1360, 0.1049, 0.0791, 0.1052, 0.1909, 0.1357, 0.1194, 0.1288],
        [0.1553, 0.0934, 0.0774, 0.1030, 0.1395, 0.1514, 0.1354, 0.1447],
        [0.1438, 0.0997, 0.0796, 0.1017, 0.1661, 0.1646, 0.1308, 0.1135],
        [0.1743, 0.0904, 0.0784, 0.1009, 0.1316, 0.1320, 0.1583, 0.1342],
        [0.1616, 0.0781, 0.0693, 0.0962, 0.1460, 0.1570, 0.1441, 0.1476],
        [0.1576, 0.0969, 0.0746, 0.0930, 0.1476, 0.1475, 0.1393, 0.1434],
        [0.1721, 0.0912, 0.0744, 0.0975, 0.1443, 0.1507, 0.1308, 0.1390],
        [0.2018, 0.0735, 0.0623, 0.0906, 0.1451, 0.1483, 0.1474, 0.1310],
        [0.2093, 0.0711, 0.0627, 0.0818, 0.1454, 0.1495, 0.1334, 0.1467],
        [0.1869, 0.0976, 0.0792, 0.1006, 0.1353, 0.1316, 0.1327, 0.1361],
        [0.1914, 0.0852, 0.0729, 0.0921, 0.1542, 0.1327, 0.1415, 0.1299],
        [0.2179, 0.0716, 0.0628, 0.0909, 0.1530, 0.1349, 0.1360, 0.1330],
        [0.2103, 0.0717, 0.0626, 0.0800, 0.1533, 0.1335, 0.1384, 0.1503],
        [0.2256, 0.0673, 0.0611, 0.0737, 0.1404, 0.1392, 0.1529, 0.1399]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1198, 0.1407, 0.1207, 0.1218, 0.1237, 0.1240, 0.1198, 0.1294],
        [0.1242, 0.1238, 0.1061, 0.1334, 0.1284, 0.1374, 0.1274, 0.1193],
        [0.1124, 0.1467, 0.1300, 0.1206, 0.1284, 0.1209, 0.1154, 0.1257],
        [0.1261, 0.1249, 0.1113, 0.1213, 0.1264, 0.1392, 0.1322, 0.1187],
        [0.1270, 0.1094, 0.0910, 0.1243, 0.1329, 0.1251, 0.1406, 0.1497],
        [0.1180, 0.1354, 0.1277, 0.1139, 0.1318, 0.1214, 0.1208, 0.1310],
        [0.1284, 0.1254, 0.1165, 0.1192, 0.1330, 0.1319, 0.1265, 0.1191],
        [0.1251, 0.1048, 0.0918, 0.1270, 0.1387, 0.1460, 0.1321, 0.1345],
        [0.1366, 0.0978, 0.0940, 0.1267, 0.1203, 0.1407, 0.1496, 0.1343],
        [0.1106, 0.1359, 0.1223, 0.1277, 0.1320, 0.1325, 0.1264, 0.1125],
        [0.1225, 0.1237, 0.1121, 0.1283, 0.1395, 0.1286, 0.1193, 0.1261],
        [0.1337, 0.1057, 0.0899, 0.1323, 0.1344, 0.1402, 0.1286, 0.1353],
        [0.1368, 0.0946, 0.0863, 0.1182, 0.1227, 0.1537, 0.1442, 0.1435],
        [0.1584, 0.0948, 0.0834, 0.1141, 0.1290, 0.1580, 0.1200, 0.1424]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 17 [   0/390]  Loss: 0.2393 (0.239)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 1.878e-02
Train: 17 [  50/390]  Loss: 0.3473 (0.377)  Acc@1: 85.9375 (87.0098)  Acc@5: 98.4375 (99.5404)LR: 1.878e-02
Train: 17 [ 100/390]  Loss: 0.3911 (0.394)  Acc@1: 82.8125 (86.3397)  Acc@5: 100.0000 (99.4740)LR: 1.878e-02
Train: 17 [ 150/390]  Loss: 0.2889 (0.407)  Acc@1: 87.5000 (85.8754)  Acc@5: 100.0000 (99.4619)LR: 1.878e-02
Train: 17 [ 200/390]  Loss: 0.4256 (0.415)  Acc@1: 82.8125 (85.2923)  Acc@5: 100.0000 (99.4947)LR: 1.878e-02
Train: 17 [ 250/390]  Loss: 0.4746 (0.415)  Acc@1: 81.2500 (85.4457)  Acc@5: 98.4375 (99.4086)LR: 1.878e-02
Train: 17 [ 300/390]  Loss: 0.4406 (0.416)  Acc@1: 84.3750 (85.4028)  Acc@5: 100.0000 (99.3978)LR: 1.878e-02
Train: 17 [ 350/390]  Loss: 0.3324 (0.415)  Acc@1: 89.0625 (85.4389)  Acc@5: 100.0000 (99.3946)LR: 1.878e-02
Train: 17 [ 390/390]  Loss: 0.4110 (0.417)  Acc@1: 82.5000 (85.3560)  Acc@5: 100.0000 (99.4120)LR: 1.878e-02
train_acc 85.356000
Valid: 17 [   0/390]  Loss: 0.5798 (0.580)  Acc@1: 78.1250 (78.1250)  Acc@5: 98.4375 (98.4375)
Valid: 17 [  50/390]  Loss: 0.2985 (0.541)  Acc@1: 90.6250 (81.7096)  Acc@5: 100.0000 (98.9583)
Valid: 17 [ 100/390]  Loss: 0.4898 (0.521)  Acc@1: 82.8125 (82.2246)  Acc@5: 100.0000 (99.0408)
Valid: 17 [ 150/390]  Loss: 0.5552 (0.518)  Acc@1: 82.8125 (82.5952)  Acc@5: 100.0000 (99.0377)
Valid: 17 [ 200/390]  Loss: 0.4469 (0.517)  Acc@1: 82.8125 (82.5560)  Acc@5: 100.0000 (99.1060)
Valid: 17 [ 250/390]  Loss: 0.6739 (0.515)  Acc@1: 78.1250 (82.5324)  Acc@5: 98.4375 (99.0911)
Valid: 17 [ 300/390]  Loss: 0.4591 (0.513)  Acc@1: 81.2500 (82.6360)  Acc@5: 98.4375 (99.1175)
Valid: 17 [ 350/390]  Loss: 0.6949 (0.519)  Acc@1: 73.4375 (82.5588)  Acc@5: 98.4375 (99.1052)
Valid: 17 [ 390/390]  Loss: 0.7821 (0.516)  Acc@1: 75.0000 (82.5440)  Acc@5: 100.0000 (99.1400)
valid_acc 82.544000
epoch = 17   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_5x5', 3), ('sep_conv_3x3', 1), ('sep_conv_3x3', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1366, 0.1029, 0.0778, 0.1055, 0.1943, 0.1365, 0.1186, 0.1278],
        [0.1583, 0.0907, 0.0751, 0.1013, 0.1397, 0.1525, 0.1364, 0.1462],
        [0.1440, 0.0973, 0.0782, 0.1016, 0.1690, 0.1651, 0.1313, 0.1135],
        [0.1810, 0.0877, 0.0765, 0.0998, 0.1309, 0.1307, 0.1585, 0.1350],
        [0.1656, 0.0753, 0.0674, 0.0953, 0.1449, 0.1587, 0.1452, 0.1475],
        [0.1595, 0.0948, 0.0733, 0.0930, 0.1483, 0.1468, 0.1397, 0.1446],
        [0.1764, 0.0883, 0.0722, 0.0957, 0.1456, 0.1510, 0.1315, 0.1393],
        [0.2096, 0.0711, 0.0605, 0.0897, 0.1449, 0.1474, 0.1455, 0.1313],
        [0.2169, 0.0679, 0.0602, 0.0795, 0.1443, 0.1501, 0.1343, 0.1468],
        [0.1926, 0.0954, 0.0778, 0.1002, 0.1330, 0.1323, 0.1342, 0.1346],
        [0.1986, 0.0825, 0.0708, 0.0906, 0.1542, 0.1317, 0.1410, 0.1305],
        [0.2279, 0.0688, 0.0607, 0.0895, 0.1535, 0.1317, 0.1350, 0.1329],
        [0.2189, 0.0687, 0.0603, 0.0780, 0.1526, 0.1329, 0.1386, 0.1501],
        [0.2352, 0.0648, 0.0589, 0.0716, 0.1400, 0.1377, 0.1526, 0.1393]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1204, 0.1401, 0.1208, 0.1224, 0.1245, 0.1224, 0.1199, 0.1294],
        [0.1229, 0.1229, 0.1055, 0.1344, 0.1295, 0.1379, 0.1275, 0.1195],
        [0.1118, 0.1464, 0.1303, 0.1213, 0.1284, 0.1206, 0.1165, 0.1248],
        [0.1252, 0.1242, 0.1117, 0.1212, 0.1257, 0.1391, 0.1341, 0.1187],
        [0.1270, 0.1071, 0.0901, 0.1252, 0.1329, 0.1271, 0.1395, 0.1512],
        [0.1182, 0.1350, 0.1282, 0.1154, 0.1324, 0.1201, 0.1203, 0.1304],
        [0.1279, 0.1253, 0.1172, 0.1198, 0.1330, 0.1325, 0.1254, 0.1190],
        [0.1257, 0.1023, 0.0908, 0.1277, 0.1409, 0.1472, 0.1311, 0.1342],
        [0.1363, 0.0954, 0.0930, 0.1274, 0.1212, 0.1406, 0.1509, 0.1352],
        [0.1097, 0.1351, 0.1229, 0.1295, 0.1321, 0.1309, 0.1272, 0.1126],
        [0.1214, 0.1230, 0.1125, 0.1300, 0.1405, 0.1279, 0.1193, 0.1254],
        [0.1345, 0.1029, 0.0890, 0.1339, 0.1342, 0.1397, 0.1294, 0.1363],
        [0.1378, 0.0921, 0.0855, 0.1198, 0.1220, 0.1532, 0.1455, 0.1441],
        [0.1615, 0.0924, 0.0826, 0.1147, 0.1293, 0.1558, 0.1204, 0.1433]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 18 [   0/390]  Loss: 0.2966 (0.297)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)LR: 1.811e-02
Train: 18 [  50/390]  Loss: 0.3923 (0.397)  Acc@1: 85.9375 (85.8456)  Acc@5: 98.4375 (99.6630)LR: 1.811e-02
Train: 18 [ 100/390]  Loss: 0.4416 (0.397)  Acc@1: 82.8125 (85.9375)  Acc@5: 96.8750 (99.5359)LR: 1.811e-02
Train: 18 [ 150/390]  Loss: 0.2560 (0.395)  Acc@1: 87.5000 (86.0513)  Acc@5: 100.0000 (99.5240)LR: 1.811e-02
Train: 18 [ 200/390]  Loss: 0.5093 (0.395)  Acc@1: 79.6875 (86.2018)  Acc@5: 100.0000 (99.5180)LR: 1.811e-02
Train: 18 [ 250/390]  Loss: 0.4125 (0.403)  Acc@1: 84.3750 (85.8628)  Acc@5: 100.0000 (99.5144)LR: 1.811e-02
Train: 18 [ 300/390]  Loss: 0.2615 (0.404)  Acc@1: 89.0625 (85.8389)  Acc@5: 100.0000 (99.5069)LR: 1.811e-02
Train: 18 [ 350/390]  Loss: 0.4366 (0.408)  Acc@1: 87.5000 (85.7283)  Acc@5: 98.4375 (99.4881)LR: 1.811e-02
Train: 18 [ 390/390]  Loss: 0.8765 (0.409)  Acc@1: 70.0000 (85.6400)  Acc@5: 100.0000 (99.4760)LR: 1.811e-02
train_acc 85.640000
Valid: 18 [   0/390]  Loss: 0.4987 (0.499)  Acc@1: 82.8125 (82.8125)  Acc@5: 96.8750 (96.8750)
Valid: 18 [  50/390]  Loss: 0.5067 (0.520)  Acc@1: 85.9375 (81.8934)  Acc@5: 98.4375 (99.1728)
Valid: 18 [ 100/390]  Loss: 0.5132 (0.524)  Acc@1: 85.9375 (81.9771)  Acc@5: 96.8750 (99.1337)
Valid: 18 [ 150/390]  Loss: 0.7476 (0.515)  Acc@1: 79.6875 (82.3986)  Acc@5: 100.0000 (99.1722)
Valid: 18 [ 200/390]  Loss: 0.4984 (0.517)  Acc@1: 84.3750 (82.4238)  Acc@5: 100.0000 (99.2149)
Valid: 18 [ 250/390]  Loss: 0.5321 (0.516)  Acc@1: 79.6875 (82.3456)  Acc@5: 100.0000 (99.2094)
Valid: 18 [ 300/390]  Loss: 0.5721 (0.511)  Acc@1: 81.2500 (82.6723)  Acc@5: 98.4375 (99.2421)
Valid: 18 [ 350/390]  Loss: 0.7833 (0.516)  Acc@1: 70.3125 (82.5766)  Acc@5: 100.0000 (99.2165)
Valid: 18 [ 390/390]  Loss: 0.3803 (0.515)  Acc@1: 85.0000 (82.6160)  Acc@5: 100.0000 (99.1760)
valid_acc 82.616000
epoch = 18   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_3x3', 1), ('sep_conv_5x5', 1), ('sep_conv_5x5', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 4)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1372, 0.1010, 0.0760, 0.1048, 0.1972, 0.1371, 0.1187, 0.1279],
        [0.1612, 0.0890, 0.0732, 0.0998, 0.1408, 0.1524, 0.1371, 0.1464],
        [0.1438, 0.0953, 0.0763, 0.1006, 0.1731, 0.1673, 0.1314, 0.1121],
        [0.1868, 0.0857, 0.0741, 0.0980, 0.1316, 0.1298, 0.1586, 0.1355],
        [0.1706, 0.0726, 0.0651, 0.0937, 0.1444, 0.1584, 0.1457, 0.1495],
        [0.1621, 0.0926, 0.0710, 0.0916, 0.1494, 0.1483, 0.1397, 0.1452],
        [0.1821, 0.0855, 0.0694, 0.0931, 0.1462, 0.1515, 0.1317, 0.1406],
        [0.2150, 0.0678, 0.0577, 0.0868, 0.1445, 0.1507, 0.1451, 0.1324],
        [0.2264, 0.0648, 0.0575, 0.0771, 0.1432, 0.1486, 0.1349, 0.1475],
        [0.1974, 0.0933, 0.0759, 0.0993, 0.1319, 0.1324, 0.1348, 0.1350],
        [0.2072, 0.0802, 0.0686, 0.0889, 0.1556, 0.1303, 0.1398, 0.1295],
        [0.2373, 0.0656, 0.0580, 0.0869, 0.1529, 0.1319, 0.1350, 0.1324],
        [0.2289, 0.0658, 0.0580, 0.0762, 0.1518, 0.1314, 0.1373, 0.1507],
        [0.2444, 0.0621, 0.0564, 0.0691, 0.1379, 0.1373, 0.1533, 0.1395]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1201, 0.1395, 0.1205, 0.1241, 0.1247, 0.1221, 0.1196, 0.1294],
        [0.1226, 0.1224, 0.1047, 0.1350, 0.1303, 0.1375, 0.1285, 0.1191],
        [0.1113, 0.1455, 0.1298, 0.1217, 0.1287, 0.1211, 0.1171, 0.1248],
        [0.1257, 0.1239, 0.1114, 0.1203, 0.1268, 0.1391, 0.1348, 0.1180],
        [0.1271, 0.1056, 0.0888, 0.1257, 0.1334, 0.1265, 0.1400, 0.1530],
        [0.1173, 0.1340, 0.1281, 0.1170, 0.1321, 0.1204, 0.1205, 0.1306],
        [0.1280, 0.1256, 0.1176, 0.1205, 0.1331, 0.1307, 0.1244, 0.1201],
        [0.1269, 0.1005, 0.0896, 0.1285, 0.1401, 0.1489, 0.1306, 0.1348],
        [0.1374, 0.0933, 0.0917, 0.1277, 0.1205, 0.1410, 0.1533, 0.1351],
        [0.1084, 0.1339, 0.1219, 0.1316, 0.1320, 0.1323, 0.1278, 0.1123],
        [0.1206, 0.1215, 0.1115, 0.1324, 0.1416, 0.1281, 0.1197, 0.1245],
        [0.1337, 0.1016, 0.0877, 0.1343, 0.1354, 0.1400, 0.1299, 0.1373],
        [0.1387, 0.0904, 0.0841, 0.1201, 0.1219, 0.1539, 0.1473, 0.1435],
        [0.1633, 0.0911, 0.0817, 0.1151, 0.1296, 0.1545, 0.1205, 0.1443]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 19 [   0/390]  Loss: 0.4926 (0.493)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)LR: 1.742e-02
Train: 19 [  50/390]  Loss: 0.5610 (0.414)  Acc@1: 76.5625 (86.1826)  Acc@5: 100.0000 (99.6936)LR: 1.742e-02
Train: 19 [ 100/390]  Loss: 0.3252 (0.397)  Acc@1: 90.6250 (86.5099)  Acc@5: 98.4375 (99.6442)LR: 1.742e-02
Train: 19 [ 150/390]  Loss: 0.4602 (0.396)  Acc@1: 84.3750 (86.3307)  Acc@5: 100.0000 (99.6275)LR: 1.742e-02
Train: 19 [ 200/390]  Loss: 0.5441 (0.397)  Acc@1: 85.9375 (86.0852)  Acc@5: 98.4375 (99.5725)LR: 1.742e-02
Train: 19 [ 250/390]  Loss: 0.4755 (0.395)  Acc@1: 89.0625 (86.3359)  Acc@5: 100.0000 (99.5829)LR: 1.742e-02
Train: 19 [ 300/390]  Loss: 0.3829 (0.394)  Acc@1: 81.2500 (86.3424)  Acc@5: 100.0000 (99.6003)LR: 1.742e-02
Train: 19 [ 350/390]  Loss: 0.3262 (0.394)  Acc@1: 89.0625 (86.4272)  Acc@5: 100.0000 (99.5637)LR: 1.742e-02
Train: 19 [ 390/390]  Loss: 0.3628 (0.396)  Acc@1: 87.5000 (86.3840)  Acc@5: 100.0000 (99.5800)LR: 1.742e-02
train_acc 86.384000
Valid: 19 [   0/390]  Loss: 0.4174 (0.417)  Acc@1: 85.9375 (85.9375)  Acc@5: 98.4375 (98.4375)
Valid: 19 [  50/390]  Loss: 0.3661 (0.529)  Acc@1: 81.2500 (82.5061)  Acc@5: 100.0000 (99.1422)
Valid: 19 [ 100/390]  Loss: 0.4781 (0.540)  Acc@1: 84.3750 (82.3175)  Acc@5: 100.0000 (99.1027)
Valid: 19 [ 150/390]  Loss: 0.3390 (0.532)  Acc@1: 82.8125 (82.5021)  Acc@5: 100.0000 (99.1101)
Valid: 19 [ 200/390]  Loss: 0.6018 (0.529)  Acc@1: 76.5625 (82.7114)  Acc@5: 100.0000 (99.1449)
Valid: 19 [ 250/390]  Loss: 0.4447 (0.527)  Acc@1: 84.3750 (82.7067)  Acc@5: 100.0000 (99.1783)
Valid: 19 [ 300/390]  Loss: 0.4431 (0.527)  Acc@1: 82.8125 (82.6412)  Acc@5: 100.0000 (99.1694)
Valid: 19 [ 350/390]  Loss: 0.2285 (0.532)  Acc@1: 92.1875 (82.5053)  Acc@5: 100.0000 (99.1498)
Valid: 19 [ 390/390]  Loss: 0.8056 (0.532)  Acc@1: 72.5000 (82.4680)  Acc@5: 97.5000 (99.1600)
valid_acc 82.468000
epoch = 19   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1365, 0.0992, 0.0745, 0.1045, 0.2028, 0.1364, 0.1186, 0.1275],
        [0.1660, 0.0861, 0.0707, 0.0975, 0.1406, 0.1534, 0.1378, 0.1478],
        [0.1438, 0.0939, 0.0751, 0.1004, 0.1761, 0.1688, 0.1307, 0.1111],
        [0.1946, 0.0829, 0.0718, 0.0964, 0.1303, 0.1296, 0.1590, 0.1354],
        [0.1737, 0.0706, 0.0635, 0.0928, 0.1424, 0.1602, 0.1463, 0.1504],
        [0.1641, 0.0916, 0.0699, 0.0914, 0.1514, 0.1465, 0.1396, 0.1456],
        [0.1893, 0.0831, 0.0671, 0.0915, 0.1464, 0.1501, 0.1310, 0.1414],
        [0.2227, 0.0657, 0.0559, 0.0856, 0.1441, 0.1500, 0.1426, 0.1333],
        [0.2360, 0.0622, 0.0552, 0.0750, 0.1428, 0.1472, 0.1342, 0.1475],
        [0.2030, 0.0927, 0.0750, 0.0999, 0.1293, 0.1316, 0.1348, 0.1338],
        [0.2143, 0.0772, 0.0663, 0.0865, 0.1567, 0.1288, 0.1392, 0.1309],
        [0.2475, 0.0635, 0.0563, 0.0860, 0.1538, 0.1300, 0.1331, 0.1298],
        [0.2391, 0.0633, 0.0560, 0.0745, 0.1511, 0.1303, 0.1359, 0.1499],
        [0.2560, 0.0597, 0.0543, 0.0670, 0.1368, 0.1352, 0.1527, 0.1384]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1190, 0.1397, 0.1206, 0.1251, 0.1253, 0.1210, 0.1204, 0.1291],
        [0.1229, 0.1231, 0.1051, 0.1350, 0.1301, 0.1364, 0.1287, 0.1186],
        [0.1106, 0.1462, 0.1311, 0.1214, 0.1275, 0.1207, 0.1169, 0.1256],
        [0.1261, 0.1251, 0.1122, 0.1198, 0.1269, 0.1389, 0.1336, 0.1173],
        [0.1284, 0.1046, 0.0884, 0.1272, 0.1320, 0.1263, 0.1387, 0.1544],
        [0.1165, 0.1345, 0.1296, 0.1164, 0.1327, 0.1197, 0.1202, 0.1303],
        [0.1276, 0.1267, 0.1185, 0.1194, 0.1335, 0.1306, 0.1239, 0.1199],
        [0.1277, 0.0999, 0.0893, 0.1297, 0.1399, 0.1497, 0.1297, 0.1341],
        [0.1374, 0.0914, 0.0907, 0.1275, 0.1210, 0.1409, 0.1553, 0.1358],
        [0.1071, 0.1330, 0.1223, 0.1336, 0.1316, 0.1336, 0.1278, 0.1110],
        [0.1201, 0.1219, 0.1121, 0.1329, 0.1425, 0.1275, 0.1189, 0.1241],
        [0.1346, 0.0999, 0.0870, 0.1355, 0.1357, 0.1381, 0.1306, 0.1386],
        [0.1409, 0.0881, 0.0830, 0.1204, 0.1222, 0.1531, 0.1476, 0.1446],
        [0.1641, 0.0893, 0.0806, 0.1146, 0.1306, 0.1557, 0.1202, 0.1448]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 20 [   0/390]  Loss: 0.4071 (0.407)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)LR: 1.671e-02
Train: 20 [  50/390]  Loss: 0.4566 (0.385)  Acc@1: 81.2500 (86.4277)  Acc@5: 100.0000 (99.6017)LR: 1.671e-02
Train: 20 [ 100/390]  Loss: 0.3606 (0.384)  Acc@1: 87.5000 (86.4171)  Acc@5: 100.0000 (99.5514)LR: 1.671e-02
Train: 20 [ 150/390]  Loss: 0.4733 (0.379)  Acc@1: 81.2500 (86.5584)  Acc@5: 100.0000 (99.5550)LR: 1.671e-02
Train: 20 [ 200/390]  Loss: 0.2907 (0.383)  Acc@1: 87.5000 (86.4428)  Acc@5: 100.0000 (99.5569)LR: 1.671e-02
Train: 20 [ 250/390]  Loss: 0.4965 (0.390)  Acc@1: 84.3750 (86.3297)  Acc@5: 100.0000 (99.5642)LR: 1.671e-02
Train: 20 [ 300/390]  Loss: 0.3894 (0.396)  Acc@1: 85.9375 (86.1140)  Acc@5: 100.0000 (99.5328)LR: 1.671e-02
Train: 20 [ 350/390]  Loss: 0.3690 (0.394)  Acc@1: 93.7500 (86.2936)  Acc@5: 98.4375 (99.5459)LR: 1.671e-02
Train: 20 [ 390/390]  Loss: 0.1153 (0.393)  Acc@1: 95.0000 (86.3080)  Acc@5: 100.0000 (99.5360)LR: 1.671e-02
train_acc 86.308000
Valid: 20 [   0/390]  Loss: 0.5234 (0.523)  Acc@1: 79.6875 (79.6875)  Acc@5: 100.0000 (100.0000)
Valid: 20 [  50/390]  Loss: 0.6219 (0.486)  Acc@1: 76.5625 (83.4252)  Acc@5: 100.0000 (99.2341)
Valid: 20 [ 100/390]  Loss: 0.2360 (0.477)  Acc@1: 93.7500 (84.0501)  Acc@5: 100.0000 (99.2265)
Valid: 20 [ 150/390]  Loss: 0.5354 (0.482)  Acc@1: 82.8125 (84.0335)  Acc@5: 100.0000 (99.1722)
Valid: 20 [ 200/390]  Loss: 0.4312 (0.488)  Acc@1: 84.3750 (83.8231)  Acc@5: 100.0000 (99.2071)
Valid: 20 [ 250/390]  Loss: 0.6976 (0.491)  Acc@1: 79.6875 (83.7338)  Acc@5: 100.0000 (99.2281)
Valid: 20 [ 300/390]  Loss: 0.4545 (0.490)  Acc@1: 87.5000 (83.7936)  Acc@5: 98.4375 (99.2265)
Valid: 20 [ 350/390]  Loss: 0.4142 (0.488)  Acc@1: 90.6250 (83.8319)  Acc@5: 100.0000 (99.2254)
Valid: 20 [ 390/390]  Loss: 0.5311 (0.486)  Acc@1: 80.0000 (83.8040)  Acc@5: 100.0000 (99.2480)
valid_acc 83.804000
epoch = 20   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 2)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1371, 0.0970, 0.0728, 0.1048, 0.2070, 0.1351, 0.1195, 0.1267],
        [0.1690, 0.0842, 0.0691, 0.0971, 0.1407, 0.1518, 0.1392, 0.1489],
        [0.1454, 0.0922, 0.0734, 0.1004, 0.1793, 0.1705, 0.1290, 0.1098],
        [0.2009, 0.0809, 0.0704, 0.0957, 0.1294, 0.1289, 0.1580, 0.1358],
        [0.1786, 0.0682, 0.0618, 0.0923, 0.1416, 0.1603, 0.1469, 0.1502],
        [0.1674, 0.0899, 0.0683, 0.0911, 0.1519, 0.1470, 0.1388, 0.1456],
        [0.1940, 0.0809, 0.0657, 0.0908, 0.1461, 0.1511, 0.1286, 0.1428],
        [0.2323, 0.0630, 0.0540, 0.0844, 0.1435, 0.1492, 0.1415, 0.1320],
        [0.2458, 0.0590, 0.0527, 0.0726, 0.1415, 0.1459, 0.1353, 0.1471],
        [0.2083, 0.0906, 0.0733, 0.0995, 0.1281, 0.1306, 0.1355, 0.1340],
        [0.2223, 0.0749, 0.0650, 0.0861, 0.1563, 0.1270, 0.1385, 0.1299],
        [0.2592, 0.0609, 0.0545, 0.0851, 0.1529, 0.1276, 0.1310, 0.1289],
        [0.2513, 0.0602, 0.0535, 0.0723, 0.1495, 0.1293, 0.1338, 0.1500],
        [0.2685, 0.0571, 0.0523, 0.0650, 0.1355, 0.1341, 0.1522, 0.1354]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1177, 0.1403, 0.1207, 0.1242, 0.1267, 0.1201, 0.1211, 0.1292],
        [0.1234, 0.1229, 0.1050, 0.1359, 0.1307, 0.1342, 0.1300, 0.1180],
        [0.1098, 0.1471, 0.1320, 0.1216, 0.1269, 0.1206, 0.1161, 0.1259],
        [0.1266, 0.1246, 0.1121, 0.1190, 0.1275, 0.1404, 0.1334, 0.1165],
        [0.1290, 0.1031, 0.0868, 0.1277, 0.1321, 0.1268, 0.1396, 0.1548],
        [0.1165, 0.1338, 0.1297, 0.1171, 0.1328, 0.1187, 0.1203, 0.1311],
        [0.1273, 0.1267, 0.1190, 0.1190, 0.1345, 0.1310, 0.1235, 0.1190],
        [0.1288, 0.0987, 0.0879, 0.1302, 0.1400, 0.1504, 0.1307, 0.1332],
        [0.1394, 0.0896, 0.0895, 0.1275, 0.1205, 0.1411, 0.1565, 0.1360],
        [0.1065, 0.1330, 0.1223, 0.1355, 0.1312, 0.1343, 0.1271, 0.1100],
        [0.1190, 0.1215, 0.1120, 0.1332, 0.1444, 0.1263, 0.1189, 0.1246],
        [0.1332, 0.0987, 0.0858, 0.1359, 0.1364, 0.1395, 0.1309, 0.1397],
        [0.1419, 0.0867, 0.0824, 0.1216, 0.1220, 0.1524, 0.1479, 0.1450],
        [0.1647, 0.0881, 0.0793, 0.1146, 0.1313, 0.1564, 0.1196, 0.1460]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 21 [   0/390]  Loss: 0.5331 (0.533)  Acc@1: 81.2500 (81.2500)  Acc@5: 98.4375 (98.4375)LR: 1.598e-02
Train: 21 [  50/390]  Loss: 0.2875 (0.374)  Acc@1: 92.1875 (87.6225)  Acc@5: 100.0000 (99.3566)LR: 1.598e-02
Train: 21 [ 100/390]  Loss: 0.4697 (0.353)  Acc@1: 82.8125 (88.0879)  Acc@5: 100.0000 (99.5514)LR: 1.598e-02
Train: 21 [ 150/390]  Loss: 0.4622 (0.359)  Acc@1: 81.2500 (87.7483)  Acc@5: 100.0000 (99.5344)LR: 1.598e-02
Train: 21 [ 200/390]  Loss: 0.3047 (0.368)  Acc@1: 89.0625 (87.4922)  Acc@5: 100.0000 (99.5336)LR: 1.598e-02
Train: 21 [ 250/390]  Loss: 0.2982 (0.378)  Acc@1: 87.5000 (87.1576)  Acc@5: 100.0000 (99.5580)LR: 1.598e-02
Train: 21 [ 300/390]  Loss: 0.2561 (0.377)  Acc@1: 92.1875 (87.1262)  Acc@5: 100.0000 (99.5795)LR: 1.598e-02
Train: 21 [ 350/390]  Loss: 0.2487 (0.378)  Acc@1: 87.5000 (87.0548)  Acc@5: 100.0000 (99.5949)LR: 1.598e-02
Train: 21 [ 390/390]  Loss: 0.4066 (0.377)  Acc@1: 82.5000 (87.0480)  Acc@5: 100.0000 (99.6080)LR: 1.598e-02
train_acc 87.048000
Valid: 21 [   0/390]  Loss: 0.5790 (0.579)  Acc@1: 79.6875 (79.6875)  Acc@5: 98.4375 (98.4375)
Valid: 21 [  50/390]  Loss: 0.5906 (0.471)  Acc@1: 76.5625 (84.8958)  Acc@5: 100.0000 (99.1115)
Valid: 21 [ 100/390]  Loss: 0.5076 (0.472)  Acc@1: 85.9375 (84.6380)  Acc@5: 98.4375 (99.0563)
Valid: 21 [ 150/390]  Loss: 0.5199 (0.466)  Acc@1: 84.3750 (84.6026)  Acc@5: 100.0000 (99.1515)
Valid: 21 [ 200/390]  Loss: 0.4208 (0.461)  Acc@1: 89.0625 (84.9036)  Acc@5: 96.8750 (99.1371)
Valid: 21 [ 250/390]  Loss: 0.7380 (0.468)  Acc@1: 78.1250 (84.6800)  Acc@5: 100.0000 (99.1721)
Valid: 21 [ 300/390]  Loss: 0.5578 (0.466)  Acc@1: 81.2500 (84.7124)  Acc@5: 100.0000 (99.1902)
Valid: 21 [ 350/390]  Loss: 0.3372 (0.463)  Acc@1: 85.9375 (84.8113)  Acc@5: 98.4375 (99.1943)
Valid: 21 [ 390/390]  Loss: 0.6261 (0.471)  Acc@1: 80.0000 (84.6080)  Acc@5: 100.0000 (99.1600)
valid_acc 84.608000
epoch = 21   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1358, 0.0951, 0.0713, 0.1047, 0.2122, 0.1346, 0.1213, 0.1251],
        [0.1734, 0.0817, 0.0669, 0.0956, 0.1417, 0.1525, 0.1393, 0.1488],
        [0.1456, 0.0902, 0.0722, 0.1001, 0.1829, 0.1733, 0.1274, 0.1084],
        [0.2076, 0.0787, 0.0686, 0.0947, 0.1284, 0.1288, 0.1585, 0.1349],
        [0.1817, 0.0659, 0.0602, 0.0912, 0.1410, 0.1614, 0.1485, 0.1502],
        [0.1703, 0.0886, 0.0675, 0.0917, 0.1515, 0.1475, 0.1387, 0.1443],
        [0.2002, 0.0785, 0.0639, 0.0894, 0.1469, 0.1503, 0.1274, 0.1434],
        [0.2415, 0.0607, 0.0525, 0.0838, 0.1408, 0.1487, 0.1405, 0.1314],
        [0.2568, 0.0569, 0.0512, 0.0715, 0.1396, 0.1433, 0.1338, 0.1469],
        [0.2138, 0.0887, 0.0721, 0.0995, 0.1264, 0.1299, 0.1354, 0.1341],
        [0.2302, 0.0726, 0.0632, 0.0846, 0.1547, 0.1250, 0.1394, 0.1303],
        [0.2683, 0.0588, 0.0530, 0.0841, 0.1502, 0.1267, 0.1309, 0.1280],
        [0.2589, 0.0580, 0.0519, 0.0707, 0.1485, 0.1289, 0.1320, 0.1511],
        [0.2821, 0.0542, 0.0500, 0.0627, 0.1338, 0.1329, 0.1509, 0.1334]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1172, 0.1413, 0.1213, 0.1242, 0.1251, 0.1203, 0.1215, 0.1291],
        [0.1234, 0.1221, 0.1045, 0.1371, 0.1308, 0.1336, 0.1300, 0.1183],
        [0.1084, 0.1470, 0.1324, 0.1226, 0.1265, 0.1202, 0.1167, 0.1262],
        [0.1267, 0.1238, 0.1120, 0.1190, 0.1278, 0.1411, 0.1334, 0.1161],
        [0.1294, 0.1007, 0.0855, 0.1275, 0.1339, 0.1255, 0.1409, 0.1566],
        [0.1156, 0.1343, 0.1313, 0.1173, 0.1332, 0.1180, 0.1202, 0.1302],
        [0.1273, 0.1259, 0.1192, 0.1198, 0.1354, 0.1314, 0.1233, 0.1178],
        [0.1304, 0.0971, 0.0876, 0.1316, 0.1413, 0.1485, 0.1290, 0.1345],
        [0.1394, 0.0884, 0.0898, 0.1289, 0.1198, 0.1410, 0.1570, 0.1357],
        [0.1050, 0.1330, 0.1234, 0.1366, 0.1312, 0.1348, 0.1267, 0.1093],
        [0.1192, 0.1205, 0.1118, 0.1342, 0.1448, 0.1268, 0.1183, 0.1244],
        [0.1344, 0.0962, 0.0850, 0.1374, 0.1365, 0.1393, 0.1310, 0.1402],
        [0.1431, 0.0845, 0.0820, 0.1225, 0.1228, 0.1530, 0.1492, 0.1429],
        [0.1663, 0.0862, 0.0784, 0.1144, 0.1321, 0.1572, 0.1184, 0.1470]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 22 [   0/390]  Loss: 0.4048 (0.405)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)LR: 1.525e-02
Train: 22 [  50/390]  Loss: 0.5000 (0.394)  Acc@1: 79.6875 (86.3664)  Acc@5: 100.0000 (99.7549)LR: 1.525e-02
Train: 22 [ 100/390]  Loss: 0.4957 (0.383)  Acc@1: 82.8125 (86.5408)  Acc@5: 100.0000 (99.6287)LR: 1.525e-02
Train: 22 [ 150/390]  Loss: 0.3296 (0.374)  Acc@1: 90.6250 (86.9826)  Acc@5: 100.0000 (99.5861)LR: 1.525e-02
Train: 22 [ 200/390]  Loss: 0.3768 (0.374)  Acc@1: 89.0625 (87.0103)  Acc@5: 98.4375 (99.5647)LR: 1.525e-02
Train: 22 [ 250/390]  Loss: 0.4010 (0.373)  Acc@1: 85.9375 (86.9397)  Acc@5: 100.0000 (99.6016)LR: 1.525e-02
Train: 22 [ 300/390]  Loss: 0.4770 (0.368)  Acc@1: 87.5000 (87.1211)  Acc@5: 98.4375 (99.6003)LR: 1.525e-02
Train: 22 [ 350/390]  Loss: 0.4201 (0.365)  Acc@1: 87.5000 (87.2552)  Acc@5: 98.4375 (99.5949)LR: 1.525e-02
Train: 22 [ 390/390]  Loss: 0.2597 (0.366)  Acc@1: 92.5000 (87.2080)  Acc@5: 100.0000 (99.5880)LR: 1.525e-02
train_acc 87.208000
Valid: 22 [   0/390]  Loss: 0.5669 (0.567)  Acc@1: 81.2500 (81.2500)  Acc@5: 98.4375 (98.4375)
Valid: 22 [  50/390]  Loss: 0.6466 (0.440)  Acc@1: 78.1250 (84.6507)  Acc@5: 98.4375 (99.2647)
Valid: 22 [ 100/390]  Loss: 0.4229 (0.442)  Acc@1: 85.9375 (84.4678)  Acc@5: 100.0000 (99.2110)
Valid: 22 [ 150/390]  Loss: 0.4994 (0.447)  Acc@1: 78.1250 (84.6026)  Acc@5: 100.0000 (99.2343)
Valid: 22 [ 200/390]  Loss: 0.4221 (0.451)  Acc@1: 87.5000 (84.4683)  Acc@5: 100.0000 (99.2304)
Valid: 22 [ 250/390]  Loss: 0.3887 (0.449)  Acc@1: 85.9375 (84.7485)  Acc@5: 100.0000 (99.2032)
Valid: 22 [ 300/390]  Loss: 0.3841 (0.454)  Acc@1: 87.5000 (84.7488)  Acc@5: 98.4375 (99.2058)
Valid: 22 [ 350/390]  Loss: 0.5679 (0.454)  Acc@1: 73.4375 (84.8469)  Acc@5: 100.0000 (99.2299)
Valid: 22 [ 390/390]  Loss: 0.4849 (0.454)  Acc@1: 80.0000 (84.8520)  Acc@5: 100.0000 (99.2400)
valid_acc 84.852000
epoch = 22   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1374, 0.0934, 0.0704, 0.1054, 0.2155, 0.1331, 0.1214, 0.1233],
        [0.1750, 0.0803, 0.0657, 0.0950, 0.1432, 0.1520, 0.1402, 0.1485],
        [0.1471, 0.0882, 0.0712, 0.1003, 0.1846, 0.1740, 0.1272, 0.1074],
        [0.2130, 0.0767, 0.0670, 0.0937, 0.1278, 0.1274, 0.1600, 0.1344],
        [0.1845, 0.0636, 0.0589, 0.0904, 0.1418, 0.1621, 0.1477, 0.1511],
        [0.1748, 0.0863, 0.0663, 0.0914, 0.1519, 0.1472, 0.1393, 0.1428],
        [0.2051, 0.0768, 0.0626, 0.0884, 0.1479, 0.1506, 0.1254, 0.1432],
        [0.2491, 0.0584, 0.0508, 0.0823, 0.1390, 0.1486, 0.1413, 0.1306],
        [0.2666, 0.0549, 0.0498, 0.0704, 0.1392, 0.1409, 0.1324, 0.1459],
        [0.2195, 0.0874, 0.0712, 0.0997, 0.1247, 0.1278, 0.1357, 0.1339],
        [0.2366, 0.0710, 0.0623, 0.0842, 0.1544, 0.1230, 0.1394, 0.1291],
        [0.2784, 0.0570, 0.0518, 0.0834, 0.1478, 0.1247, 0.1305, 0.1265],
        [0.2672, 0.0560, 0.0506, 0.0699, 0.1471, 0.1281, 0.1309, 0.1502],
        [0.2949, 0.0526, 0.0491, 0.0620, 0.1320, 0.1301, 0.1484, 0.1310]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1164, 0.1414, 0.1211, 0.1251, 0.1251, 0.1194, 0.1222, 0.1293],
        [0.1241, 0.1208, 0.1030, 0.1387, 0.1323, 0.1337, 0.1306, 0.1168],
        [0.1082, 0.1471, 0.1329, 0.1226, 0.1266, 0.1196, 0.1160, 0.1270],
        [0.1269, 0.1228, 0.1109, 0.1188, 0.1286, 0.1428, 0.1335, 0.1156],
        [0.1294, 0.0984, 0.0842, 0.1269, 0.1353, 0.1261, 0.1411, 0.1586],
        [0.1155, 0.1331, 0.1303, 0.1184, 0.1342, 0.1177, 0.1207, 0.1301],
        [0.1266, 0.1252, 0.1183, 0.1198, 0.1366, 0.1328, 0.1235, 0.1173],
        [0.1317, 0.0953, 0.0867, 0.1320, 0.1423, 0.1487, 0.1289, 0.1344],
        [0.1404, 0.0863, 0.0890, 0.1289, 0.1200, 0.1415, 0.1583, 0.1356],
        [0.1051, 0.1327, 0.1234, 0.1367, 0.1304, 0.1345, 0.1274, 0.1098],
        [0.1177, 0.1193, 0.1107, 0.1351, 0.1458, 0.1285, 0.1186, 0.1243],
        [0.1338, 0.0944, 0.0839, 0.1373, 0.1370, 0.1405, 0.1326, 0.1404],
        [0.1442, 0.0831, 0.0813, 0.1232, 0.1236, 0.1532, 0.1486, 0.1428],
        [0.1661, 0.0848, 0.0775, 0.1142, 0.1340, 0.1581, 0.1181, 0.1473]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 23 [   0/390]  Loss: 0.3762 (0.376)  Acc@1: 84.3750 (84.3750)  Acc@5: 100.0000 (100.0000)LR: 1.450e-02
Train: 23 [  50/390]  Loss: 0.3268 (0.341)  Acc@1: 90.6250 (87.8983)  Acc@5: 100.0000 (99.6324)LR: 1.450e-02
Train: 23 [ 100/390]  Loss: 0.2360 (0.339)  Acc@1: 90.6250 (87.9332)  Acc@5: 100.0000 (99.6287)LR: 1.450e-02
Train: 23 [ 150/390]  Loss: 0.3695 (0.334)  Acc@1: 89.0625 (88.1623)  Acc@5: 100.0000 (99.6792)LR: 1.450e-02
Train: 23 [ 200/390]  Loss: 0.4158 (0.340)  Acc@1: 85.9375 (87.8032)  Acc@5: 100.0000 (99.6191)LR: 1.450e-02
Train: 23 [ 250/390]  Loss: 0.4254 (0.348)  Acc@1: 89.0625 (87.6494)  Acc@5: 98.4375 (99.6327)LR: 1.450e-02
Train: 23 [ 300/390]  Loss: 0.4139 (0.355)  Acc@1: 85.9375 (87.4585)  Acc@5: 100.0000 (99.6262)LR: 1.450e-02
Train: 23 [ 350/390]  Loss: 0.2712 (0.359)  Acc@1: 90.6250 (87.3531)  Acc@5: 100.0000 (99.6038)LR: 1.450e-02
Train: 23 [ 390/390]  Loss: 0.7080 (0.359)  Acc@1: 75.0000 (87.4000)  Acc@5: 100.0000 (99.6080)LR: 1.450e-02
train_acc 87.400000
Valid: 23 [   0/390]  Loss: 0.3040 (0.304)  Acc@1: 90.6250 (90.6250)  Acc@5: 98.4375 (98.4375)
Valid: 23 [  50/390]  Loss: 0.4323 (0.428)  Acc@1: 84.3750 (85.9681)  Acc@5: 100.0000 (99.3260)
Valid: 23 [ 100/390]  Loss: 0.5578 (0.427)  Acc@1: 82.8125 (85.6900)  Acc@5: 96.8750 (99.3657)
Valid: 23 [ 150/390]  Loss: 0.1363 (0.420)  Acc@1: 95.3125 (86.0203)  Acc@5: 100.0000 (99.3377)
Valid: 23 [ 200/390]  Loss: 0.3533 (0.433)  Acc@1: 85.9375 (85.7665)  Acc@5: 100.0000 (99.2771)
Valid: 23 [ 250/390]  Loss: 0.5970 (0.431)  Acc@1: 79.6875 (85.8192)  Acc@5: 98.4375 (99.2966)
Valid: 23 [ 300/390]  Loss: 0.5117 (0.433)  Acc@1: 81.2500 (85.7402)  Acc@5: 100.0000 (99.3096)
Valid: 23 [ 350/390]  Loss: 0.3694 (0.433)  Acc@1: 89.0625 (85.5947)  Acc@5: 98.4375 (99.3145)
Valid: 23 [ 390/390]  Loss: 0.4707 (0.430)  Acc@1: 82.5000 (85.7520)  Acc@5: 100.0000 (99.3120)
valid_acc 85.752000
epoch = 23   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1389, 0.0916, 0.0696, 0.1054, 0.2165, 0.1327, 0.1220, 0.1232],
        [0.1774, 0.0785, 0.0645, 0.0944, 0.1449, 0.1514, 0.1409, 0.1479],
        [0.1484, 0.0865, 0.0706, 0.1003, 0.1864, 0.1762, 0.1258, 0.1058],
        [0.2196, 0.0752, 0.0661, 0.0937, 0.1269, 0.1247, 0.1591, 0.1347],
        [0.1878, 0.0617, 0.0577, 0.0898, 0.1417, 0.1617, 0.1490, 0.1505],
        [0.1786, 0.0849, 0.0656, 0.0914, 0.1517, 0.1458, 0.1396, 0.1423],
        [0.2104, 0.0751, 0.0614, 0.0877, 0.1474, 0.1524, 0.1243, 0.1412],
        [0.2565, 0.0563, 0.0494, 0.0807, 0.1387, 0.1476, 0.1402, 0.1306],
        [0.2780, 0.0525, 0.0481, 0.0686, 0.1383, 0.1390, 0.1308, 0.1448],
        [0.2244, 0.0860, 0.0706, 0.0995, 0.1231, 0.1275, 0.1360, 0.1328],
        [0.2453, 0.0693, 0.0613, 0.0837, 0.1534, 0.1207, 0.1385, 0.1279],
        [0.2903, 0.0548, 0.0504, 0.0823, 0.1452, 0.1220, 0.1301, 0.1250],
        [0.2788, 0.0535, 0.0490, 0.0685, 0.1454, 0.1267, 0.1288, 0.1493],
        [0.3088, 0.0504, 0.0476, 0.0605, 0.1296, 0.1276, 0.1471, 0.1284]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1150, 0.1413, 0.1205, 0.1263, 0.1264, 0.1186, 0.1236, 0.1283],
        [0.1248, 0.1204, 0.1022, 0.1394, 0.1318, 0.1336, 0.1318, 0.1159],
        [0.1076, 0.1481, 0.1336, 0.1240, 0.1264, 0.1172, 0.1155, 0.1277],
        [0.1273, 0.1225, 0.1104, 0.1193, 0.1297, 0.1446, 0.1325, 0.1136],
        [0.1304, 0.0966, 0.0837, 0.1281, 0.1354, 0.1245, 0.1403, 0.1610],
        [0.1144, 0.1327, 0.1302, 0.1197, 0.1343, 0.1174, 0.1203, 0.1310],
        [0.1263, 0.1250, 0.1178, 0.1200, 0.1372, 0.1328, 0.1242, 0.1169],
        [0.1324, 0.0934, 0.0864, 0.1331, 0.1406, 0.1500, 0.1296, 0.1346],
        [0.1412, 0.0845, 0.0879, 0.1282, 0.1199, 0.1422, 0.1602, 0.1359],
        [0.1036, 0.1329, 0.1230, 0.1377, 0.1306, 0.1347, 0.1286, 0.1090],
        [0.1166, 0.1196, 0.1108, 0.1354, 0.1474, 0.1275, 0.1190, 0.1238],
        [0.1349, 0.0927, 0.0835, 0.1392, 0.1376, 0.1393, 0.1337, 0.1391],
        [0.1456, 0.0817, 0.0808, 0.1241, 0.1242, 0.1535, 0.1478, 0.1422],
        [0.1678, 0.0835, 0.0774, 0.1152, 0.1347, 0.1566, 0.1180, 0.1469]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 24 [   0/390]  Loss: 0.2885 (0.289)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)LR: 1.375e-02
Train: 24 [  50/390]  Loss: 0.3214 (0.355)  Acc@1: 87.5000 (87.4694)  Acc@5: 100.0000 (99.6017)LR: 1.375e-02
Train: 24 [ 100/390]  Loss: 0.4962 (0.366)  Acc@1: 87.5000 (87.0668)  Acc@5: 100.0000 (99.5978)LR: 1.375e-02
Train: 24 [ 150/390]  Loss: 0.4263 (0.362)  Acc@1: 89.0625 (87.2206)  Acc@5: 100.0000 (99.5964)LR: 1.375e-02
Train: 24 [ 200/390]  Loss: 0.5317 (0.362)  Acc@1: 82.8125 (87.2357)  Acc@5: 100.0000 (99.5569)LR: 1.375e-02
Train: 24 [ 250/390]  Loss: 0.3914 (0.362)  Acc@1: 85.9375 (87.2572)  Acc@5: 100.0000 (99.5954)LR: 1.375e-02
Train: 24 [ 300/390]  Loss: 0.3663 (0.360)  Acc@1: 90.6250 (87.2041)  Acc@5: 100.0000 (99.6003)LR: 1.375e-02
Train: 24 [ 350/390]  Loss: 0.3732 (0.358)  Acc@1: 81.2500 (87.3843)  Acc@5: 100.0000 (99.5905)LR: 1.375e-02
Train: 24 [ 390/390]  Loss: 0.3360 (0.356)  Acc@1: 82.5000 (87.4920)  Acc@5: 100.0000 (99.6040)LR: 1.375e-02
train_acc 87.492000
Valid: 24 [   0/390]  Loss: 0.1179 (0.118)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)
Valid: 24 [  50/390]  Loss: 0.4831 (0.414)  Acc@1: 85.9375 (86.6422)  Acc@5: 98.4375 (99.4792)
Valid: 24 [ 100/390]  Loss: 0.2598 (0.434)  Acc@1: 92.1875 (85.6745)  Acc@5: 100.0000 (99.3348)
Valid: 24 [ 150/390]  Loss: 0.4343 (0.440)  Acc@1: 89.0625 (85.6064)  Acc@5: 100.0000 (99.3584)
Valid: 24 [ 200/390]  Loss: 0.3803 (0.441)  Acc@1: 85.9375 (85.5410)  Acc@5: 100.0000 (99.3470)
Valid: 24 [ 250/390]  Loss: 0.5078 (0.443)  Acc@1: 78.1250 (85.5578)  Acc@5: 98.4375 (99.3401)
Valid: 24 [ 300/390]  Loss: 0.3471 (0.438)  Acc@1: 85.9375 (85.7195)  Acc@5: 100.0000 (99.3615)
Valid: 24 [ 350/390]  Loss: 0.4729 (0.438)  Acc@1: 79.6875 (85.6971)  Acc@5: 100.0000 (99.3501)
Valid: 24 [ 390/390]  Loss: 0.3233 (0.439)  Acc@1: 90.0000 (85.6760)  Acc@5: 100.0000 (99.3560)
valid_acc 85.676000
epoch = 24   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1396, 0.0899, 0.0690, 0.1065, 0.2178, 0.1312, 0.1229, 0.1230],
        [0.1808, 0.0759, 0.0627, 0.0929, 0.1467, 0.1505, 0.1424, 0.1480],
        [0.1499, 0.0848, 0.0698, 0.1005, 0.1880, 0.1770, 0.1251, 0.1048],
        [0.2264, 0.0727, 0.0641, 0.0922, 0.1271, 0.1245, 0.1578, 0.1352],
        [0.1910, 0.0594, 0.0557, 0.0882, 0.1427, 0.1623, 0.1494, 0.1512],
        [0.1813, 0.0836, 0.0653, 0.0922, 0.1507, 0.1453, 0.1393, 0.1423],
        [0.2165, 0.0725, 0.0597, 0.0863, 0.1492, 0.1510, 0.1245, 0.1402],
        [0.2644, 0.0542, 0.0479, 0.0794, 0.1371, 0.1477, 0.1386, 0.1306],
        [0.2873, 0.0506, 0.0468, 0.0674, 0.1361, 0.1369, 0.1305, 0.1445],
        [0.2297, 0.0840, 0.0699, 0.0999, 0.1220, 0.1266, 0.1349, 0.1329],
        [0.2549, 0.0666, 0.0593, 0.0821, 0.1525, 0.1195, 0.1386, 0.1264],
        [0.3017, 0.0523, 0.0484, 0.0805, 0.1426, 0.1201, 0.1304, 0.1239],
        [0.2901, 0.0511, 0.0474, 0.0668, 0.1444, 0.1239, 0.1276, 0.1488],
        [0.3232, 0.0479, 0.0458, 0.0587, 0.1267, 0.1253, 0.1454, 0.1270]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1143, 0.1409, 0.1201, 0.1256, 0.1274, 0.1181, 0.1252, 0.1284],
        [0.1253, 0.1197, 0.1014, 0.1390, 0.1324, 0.1335, 0.1334, 0.1154],
        [0.1070, 0.1485, 0.1339, 0.1233, 0.1269, 0.1168, 0.1153, 0.1285],
        [0.1285, 0.1225, 0.1107, 0.1185, 0.1289, 0.1453, 0.1327, 0.1129],
        [0.1305, 0.0952, 0.0829, 0.1286, 0.1357, 0.1235, 0.1421, 0.1615],
        [0.1142, 0.1319, 0.1301, 0.1209, 0.1346, 0.1173, 0.1198, 0.1313],
        [0.1254, 0.1245, 0.1174, 0.1203, 0.1382, 0.1334, 0.1238, 0.1170],
        [0.1326, 0.0928, 0.0863, 0.1341, 0.1401, 0.1503, 0.1287, 0.1351],
        [0.1415, 0.0830, 0.0875, 0.1288, 0.1195, 0.1423, 0.1623, 0.1351],
        [0.1031, 0.1328, 0.1229, 0.1376, 0.1308, 0.1357, 0.1290, 0.1082],
        [0.1161, 0.1196, 0.1105, 0.1356, 0.1470, 0.1268, 0.1193, 0.1252],
        [0.1352, 0.0919, 0.0829, 0.1402, 0.1378, 0.1381, 0.1341, 0.1399],
        [0.1471, 0.0804, 0.0803, 0.1250, 0.1232, 0.1538, 0.1487, 0.1415],
        [0.1678, 0.0821, 0.0765, 0.1152, 0.1353, 0.1574, 0.1180, 0.1475]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 25 [   0/390]  Loss: 0.2798 (0.280)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)LR: 1.300e-02
Train: 25 [  50/390]  Loss: 0.2682 (0.323)  Acc@1: 84.3750 (88.3272)  Acc@5: 100.0000 (99.8468)LR: 1.300e-02
Train: 25 [ 100/390]  Loss: 0.4374 (0.333)  Acc@1: 87.5000 (88.0879)  Acc@5: 100.0000 (99.8298)LR: 1.300e-02
Train: 25 [ 150/390]  Loss: 0.4461 (0.336)  Acc@1: 87.5000 (87.9450)  Acc@5: 96.8750 (99.7413)LR: 1.300e-02
Train: 25 [ 200/390]  Loss: 0.3439 (0.337)  Acc@1: 90.6250 (88.0908)  Acc@5: 98.4375 (99.6813)LR: 1.300e-02
Train: 25 [ 250/390]  Loss: 0.3012 (0.338)  Acc@1: 90.6250 (88.1287)  Acc@5: 100.0000 (99.6950)LR: 1.300e-02
Train: 25 [ 300/390]  Loss: 0.2355 (0.334)  Acc@1: 92.1875 (88.2631)  Acc@5: 100.0000 (99.7041)LR: 1.300e-02
Train: 25 [ 350/390]  Loss: 0.3248 (0.335)  Acc@1: 89.0625 (88.1633)  Acc@5: 100.0000 (99.6973)LR: 1.300e-02
Train: 25 [ 390/390]  Loss: 0.5231 (0.337)  Acc@1: 90.0000 (88.0880)  Acc@5: 100.0000 (99.7080)LR: 1.300e-02
train_acc 88.088000
Valid: 25 [   0/390]  Loss: 0.5368 (0.537)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)
Valid: 25 [  50/390]  Loss: 0.4109 (0.470)  Acc@1: 89.0625 (84.8652)  Acc@5: 96.8750 (99.0502)
Valid: 25 [ 100/390]  Loss: 0.5518 (0.449)  Acc@1: 79.6875 (84.9629)  Acc@5: 98.4375 (99.2884)
Valid: 25 [ 150/390]  Loss: 0.3025 (0.451)  Acc@1: 90.6250 (84.8820)  Acc@5: 100.0000 (99.2757)
Valid: 25 [ 200/390]  Loss: 0.5015 (0.446)  Acc@1: 87.5000 (85.0824)  Acc@5: 100.0000 (99.2460)
Valid: 25 [ 250/390]  Loss: 0.4777 (0.455)  Acc@1: 84.3750 (84.8730)  Acc@5: 100.0000 (99.2156)
Valid: 25 [ 300/390]  Loss: 0.7069 (0.454)  Acc@1: 71.8750 (84.9460)  Acc@5: 98.4375 (99.1954)
Valid: 25 [ 350/390]  Loss: 0.4038 (0.451)  Acc@1: 90.6250 (85.0561)  Acc@5: 98.4375 (99.2032)
Valid: 25 [ 390/390]  Loss: 0.4836 (0.448)  Acc@1: 82.5000 (85.1480)  Acc@5: 100.0000 (99.2200)
valid_acc 85.148000
epoch = 25   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1405, 0.0875, 0.0679, 0.1061, 0.2213, 0.1311, 0.1236, 0.1220],
        [0.1845, 0.0735, 0.0609, 0.0916, 0.1473, 0.1516, 0.1426, 0.1480],
        [0.1517, 0.0829, 0.0688, 0.1004, 0.1904, 0.1786, 0.1241, 0.1031],
        [0.2337, 0.0704, 0.0623, 0.0905, 0.1263, 0.1238, 0.1586, 0.1344],
        [0.1953, 0.0576, 0.0542, 0.0873, 0.1427, 0.1641, 0.1491, 0.1496],
        [0.1859, 0.0822, 0.0651, 0.0932, 0.1505, 0.1424, 0.1384, 0.1422],
        [0.2248, 0.0703, 0.0583, 0.0851, 0.1484, 0.1505, 0.1244, 0.1382],
        [0.2725, 0.0522, 0.0465, 0.0782, 0.1337, 0.1479, 0.1381, 0.1309],
        [0.2962, 0.0484, 0.0451, 0.0656, 0.1350, 0.1363, 0.1294, 0.1441],
        [0.2366, 0.0823, 0.0694, 0.1007, 0.1214, 0.1242, 0.1334, 0.1320],
        [0.2644, 0.0644, 0.0574, 0.0803, 0.1505, 0.1179, 0.1388, 0.1264],
        [0.3135, 0.0505, 0.0469, 0.0795, 0.1396, 0.1192, 0.1285, 0.1224],
        [0.2999, 0.0489, 0.0456, 0.0649, 0.1421, 0.1226, 0.1261, 0.1498],
        [0.3367, 0.0459, 0.0440, 0.0569, 0.1260, 0.1231, 0.1434, 0.1241]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1141, 0.1402, 0.1198, 0.1252, 0.1289, 0.1185, 0.1244, 0.1289],
        [0.1247, 0.1171, 0.0991, 0.1387, 0.1336, 0.1356, 0.1354, 0.1158],
        [0.1070, 0.1476, 0.1336, 0.1223, 0.1270, 0.1170, 0.1159, 0.1295],
        [0.1290, 0.1207, 0.1097, 0.1188, 0.1296, 0.1470, 0.1331, 0.1121],
        [0.1309, 0.0936, 0.0822, 0.1296, 0.1367, 0.1228, 0.1426, 0.1616],
        [0.1140, 0.1301, 0.1303, 0.1227, 0.1353, 0.1170, 0.1187, 0.1318],
        [0.1259, 0.1228, 0.1166, 0.1205, 0.1384, 0.1348, 0.1247, 0.1162],
        [0.1318, 0.0910, 0.0857, 0.1345, 0.1406, 0.1523, 0.1284, 0.1356],
        [0.1415, 0.0810, 0.0863, 0.1282, 0.1199, 0.1430, 0.1635, 0.1367],
        [0.1036, 0.1322, 0.1230, 0.1385, 0.1301, 0.1355, 0.1291, 0.1080],
        [0.1159, 0.1177, 0.1094, 0.1364, 0.1489, 0.1264, 0.1199, 0.1255],
        [0.1348, 0.0904, 0.0821, 0.1408, 0.1390, 0.1377, 0.1343, 0.1409],
        [0.1473, 0.0788, 0.0795, 0.1254, 0.1228, 0.1546, 0.1502, 0.1413],
        [0.1669, 0.0805, 0.0758, 0.1149, 0.1351, 0.1585, 0.1191, 0.1492]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 26 [   0/390]  Loss: 0.1313 (0.131)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 1.225e-02
Train: 26 [  50/390]  Loss: 0.2497 (0.330)  Acc@1: 92.1875 (88.9400)  Acc@5: 100.0000 (99.6017)LR: 1.225e-02
Train: 26 [ 100/390]  Loss: 0.4758 (0.332)  Acc@1: 85.9375 (88.5984)  Acc@5: 100.0000 (99.5978)LR: 1.225e-02
Train: 26 [ 150/390]  Loss: 0.4066 (0.331)  Acc@1: 87.5000 (88.6693)  Acc@5: 98.4375 (99.6482)LR: 1.225e-02
Train: 26 [ 200/390]  Loss: 0.3551 (0.333)  Acc@1: 89.0625 (88.5494)  Acc@5: 98.4375 (99.6580)LR: 1.225e-02
Train: 26 [ 250/390]  Loss: 0.3496 (0.332)  Acc@1: 89.0625 (88.4898)  Acc@5: 98.4375 (99.6763)LR: 1.225e-02
Train: 26 [ 300/390]  Loss: 0.3688 (0.332)  Acc@1: 84.3750 (88.4863)  Acc@5: 100.0000 (99.6626)LR: 1.225e-02
Train: 26 [ 350/390]  Loss: 0.2695 (0.332)  Acc@1: 90.6250 (88.5105)  Acc@5: 100.0000 (99.6483)LR: 1.225e-02
Train: 26 [ 390/390]  Loss: 0.4016 (0.332)  Acc@1: 87.5000 (88.4600)  Acc@5: 100.0000 (99.6520)LR: 1.225e-02
train_acc 88.460000
Valid: 26 [   0/390]  Loss: 0.4071 (0.407)  Acc@1: 82.8125 (82.8125)  Acc@5: 98.4375 (98.4375)
Valid: 26 [  50/390]  Loss: 0.3636 (0.422)  Acc@1: 84.3750 (85.8456)  Acc@5: 98.4375 (99.3873)
Valid: 26 [ 100/390]  Loss: 0.3237 (0.427)  Acc@1: 87.5000 (85.6590)  Acc@5: 100.0000 (99.4431)
Valid: 26 [ 150/390]  Loss: 0.4605 (0.421)  Acc@1: 84.3750 (85.9375)  Acc@5: 98.4375 (99.5033)
Valid: 26 [ 200/390]  Loss: 0.3627 (0.428)  Acc@1: 90.6250 (85.7665)  Acc@5: 100.0000 (99.4481)
Valid: 26 [ 250/390]  Loss: 0.4572 (0.435)  Acc@1: 87.5000 (85.5391)  Acc@5: 98.4375 (99.3650)
Valid: 26 [ 300/390]  Loss: 0.6355 (0.444)  Acc@1: 79.6875 (85.3353)  Acc@5: 98.4375 (99.3355)
Valid: 26 [ 350/390]  Loss: 0.3358 (0.443)  Acc@1: 89.0625 (85.4523)  Acc@5: 98.4375 (99.3456)
Valid: 26 [ 390/390]  Loss: 0.4380 (0.443)  Acc@1: 82.5000 (85.3400)  Acc@5: 100.0000 (99.3320)
valid_acc 85.340000
epoch = 26   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('skip_connect', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1416, 0.0855, 0.0671, 0.1071, 0.2211, 0.1317, 0.1241, 0.1217],
        [0.1873, 0.0717, 0.0595, 0.0907, 0.1479, 0.1503, 0.1441, 0.1486],
        [0.1524, 0.0813, 0.0679, 0.1006, 0.1909, 0.1809, 0.1233, 0.1026],
        [0.2415, 0.0690, 0.0610, 0.0898, 0.1244, 0.1239, 0.1573, 0.1330],
        [0.2016, 0.0559, 0.0527, 0.0868, 0.1421, 0.1637, 0.1480, 0.1492],
        [0.1902, 0.0807, 0.0645, 0.0938, 0.1506, 0.1410, 0.1378, 0.1416],
        [0.2309, 0.0687, 0.0570, 0.0845, 0.1493, 0.1491, 0.1234, 0.1371],
        [0.2809, 0.0503, 0.0449, 0.0769, 0.1311, 0.1475, 0.1373, 0.1311],
        [0.3078, 0.0466, 0.0435, 0.0638, 0.1344, 0.1329, 0.1280, 0.1431],
        [0.2427, 0.0806, 0.0687, 0.1013, 0.1203, 0.1233, 0.1327, 0.1303],
        [0.2712, 0.0631, 0.0566, 0.0798, 0.1493, 0.1174, 0.1380, 0.1246],
        [0.3271, 0.0488, 0.0454, 0.0784, 0.1373, 0.1171, 0.1266, 0.1193],
        [0.3140, 0.0471, 0.0441, 0.0635, 0.1396, 0.1198, 0.1240, 0.1480],
        [0.3511, 0.0442, 0.0428, 0.0558, 0.1238, 0.1204, 0.1400, 0.1220]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1132, 0.1405, 0.1204, 0.1248, 0.1292, 0.1178, 0.1248, 0.1293],
        [0.1254, 0.1160, 0.0987, 0.1372, 0.1330, 0.1372, 0.1360, 0.1164],
        [0.1060, 0.1477, 0.1339, 0.1215, 0.1269, 0.1169, 0.1161, 0.1310],
        [0.1290, 0.1190, 0.1085, 0.1181, 0.1303, 0.1494, 0.1339, 0.1117],
        [0.1321, 0.0917, 0.0810, 0.1291, 0.1374, 0.1236, 0.1433, 0.1618],
        [0.1124, 0.1298, 0.1307, 0.1243, 0.1352, 0.1168, 0.1200, 0.1309],
        [0.1260, 0.1215, 0.1157, 0.1216, 0.1381, 0.1352, 0.1254, 0.1164],
        [0.1332, 0.0894, 0.0851, 0.1353, 0.1394, 0.1519, 0.1291, 0.1366],
        [0.1419, 0.0803, 0.0867, 0.1295, 0.1186, 0.1420, 0.1644, 0.1365],
        [0.1026, 0.1331, 0.1235, 0.1380, 0.1302, 0.1360, 0.1293, 0.1073],
        [0.1160, 0.1165, 0.1082, 0.1379, 0.1516, 0.1246, 0.1201, 0.1251],
        [0.1370, 0.0892, 0.0816, 0.1430, 0.1399, 0.1361, 0.1328, 0.1404],
        [0.1474, 0.0783, 0.0794, 0.1260, 0.1230, 0.1549, 0.1511, 0.1399],
        [0.1670, 0.0794, 0.0755, 0.1147, 0.1367, 0.1581, 0.1189, 0.1496]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 27 [   0/390]  Loss: 0.2881 (0.288)  Acc@1: 89.0625 (89.0625)  Acc@5: 98.4375 (98.4375)LR: 1.150e-02
Train: 27 [  50/390]  Loss: 0.4678 (0.301)  Acc@1: 81.2500 (89.8591)  Acc@5: 100.0000 (99.6324)LR: 1.150e-02
Train: 27 [ 100/390]  Loss: 0.4770 (0.319)  Acc@1: 82.8125 (89.1089)  Acc@5: 98.4375 (99.6442)LR: 1.150e-02
Train: 27 [ 150/390]  Loss: 0.2621 (0.322)  Acc@1: 89.0625 (88.7728)  Acc@5: 100.0000 (99.6792)LR: 1.150e-02
Train: 27 [ 200/390]  Loss: 0.2457 (0.327)  Acc@1: 92.1875 (88.5728)  Acc@5: 98.4375 (99.6813)LR: 1.150e-02
Train: 27 [ 250/390]  Loss: 0.4633 (0.325)  Acc@1: 81.2500 (88.5769)  Acc@5: 100.0000 (99.6825)LR: 1.150e-02
Train: 27 [ 300/390]  Loss: 0.2944 (0.327)  Acc@1: 93.7500 (88.6472)  Acc@5: 100.0000 (99.6782)LR: 1.150e-02
Train: 27 [ 350/390]  Loss: 0.4030 (0.329)  Acc@1: 87.5000 (88.6218)  Acc@5: 100.0000 (99.6661)LR: 1.150e-02
Train: 27 [ 390/390]  Loss: 0.2638 (0.330)  Acc@1: 92.5000 (88.6080)  Acc@5: 100.0000 (99.6360)LR: 1.150e-02
train_acc 88.608000
Valid: 27 [   0/390]  Loss: 0.5783 (0.578)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)
Valid: 27 [  50/390]  Loss: 0.1733 (0.397)  Acc@1: 96.8750 (86.5809)  Acc@5: 100.0000 (99.5098)
Valid: 27 [ 100/390]  Loss: 0.2207 (0.389)  Acc@1: 93.7500 (86.8348)  Acc@5: 100.0000 (99.4895)
Valid: 27 [ 150/390]  Loss: 0.3838 (0.388)  Acc@1: 89.0625 (86.9309)  Acc@5: 98.4375 (99.4826)
Valid: 27 [ 200/390]  Loss: 0.4290 (0.394)  Acc@1: 87.5000 (86.8081)  Acc@5: 100.0000 (99.5025)
Valid: 27 [ 250/390]  Loss: 0.3002 (0.398)  Acc@1: 87.5000 (86.7094)  Acc@5: 100.0000 (99.4460)
Valid: 27 [ 300/390]  Loss: 0.4260 (0.398)  Acc@1: 84.3750 (86.6279)  Acc@5: 98.4375 (99.4446)
Valid: 27 [ 350/390]  Loss: 0.5910 (0.399)  Acc@1: 84.3750 (86.5296)  Acc@5: 98.4375 (99.4525)
Valid: 27 [ 390/390]  Loss: 0.4615 (0.400)  Acc@1: 82.5000 (86.5360)  Acc@5: 100.0000 (99.4240)
valid_acc 86.536000
epoch = 27   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('sep_conv_5x5', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_5x5', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1434, 0.0835, 0.0660, 0.1070, 0.2240, 0.1317, 0.1227, 0.1216],
        [0.1894, 0.0702, 0.0586, 0.0902, 0.1500, 0.1493, 0.1438, 0.1485],
        [0.1531, 0.0794, 0.0668, 0.1002, 0.1933, 0.1838, 0.1230, 0.1005],
        [0.2486, 0.0674, 0.0603, 0.0893, 0.1229, 0.1239, 0.1566, 0.1309],
        [0.2055, 0.0546, 0.0520, 0.0864, 0.1423, 0.1633, 0.1475, 0.1484],
        [0.1950, 0.0791, 0.0634, 0.0932, 0.1522, 0.1393, 0.1370, 0.1406],
        [0.2380, 0.0669, 0.0560, 0.0835, 0.1492, 0.1475, 0.1218, 0.1371],
        [0.2880, 0.0485, 0.0436, 0.0753, 0.1294, 0.1477, 0.1368, 0.1309],
        [0.3176, 0.0451, 0.0425, 0.0626, 0.1333, 0.1312, 0.1265, 0.1412],
        [0.2495, 0.0793, 0.0676, 0.1009, 0.1191, 0.1226, 0.1323, 0.1287],
        [0.2782, 0.0613, 0.0555, 0.0788, 0.1482, 0.1167, 0.1386, 0.1227],
        [0.3381, 0.0473, 0.0444, 0.0774, 0.1355, 0.1149, 0.1251, 0.1173],
        [0.3242, 0.0456, 0.0432, 0.0625, 0.1367, 0.1194, 0.1216, 0.1468],
        [0.3646, 0.0426, 0.0414, 0.0542, 0.1230, 0.1170, 0.1357, 0.1215]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1134, 0.1394, 0.1199, 0.1252, 0.1308, 0.1167, 0.1245, 0.1300],
        [0.1250, 0.1151, 0.0979, 0.1375, 0.1337, 0.1376, 0.1375, 0.1159],
        [0.1062, 0.1469, 0.1335, 0.1226, 0.1262, 0.1173, 0.1159, 0.1314],
        [0.1286, 0.1182, 0.1079, 0.1197, 0.1295, 0.1507, 0.1342, 0.1111],
        [0.1312, 0.0902, 0.0803, 0.1285, 0.1382, 0.1237, 0.1443, 0.1636],
        [0.1125, 0.1284, 0.1303, 0.1262, 0.1350, 0.1171, 0.1196, 0.1310],
        [0.1256, 0.1213, 0.1159, 0.1225, 0.1374, 0.1362, 0.1262, 0.1149],
        [0.1343, 0.0885, 0.0851, 0.1360, 0.1392, 0.1503, 0.1298, 0.1369],
        [0.1414, 0.0790, 0.0865, 0.1298, 0.1193, 0.1417, 0.1661, 0.1363],
        [0.1031, 0.1328, 0.1234, 0.1382, 0.1300, 0.1354, 0.1303, 0.1070],
        [0.1150, 0.1163, 0.1082, 0.1399, 0.1518, 0.1248, 0.1200, 0.1239],
        [0.1372, 0.0887, 0.0823, 0.1447, 0.1376, 0.1353, 0.1338, 0.1403],
        [0.1481, 0.0778, 0.0804, 0.1281, 0.1239, 0.1531, 0.1488, 0.1399],
        [0.1662, 0.0789, 0.0760, 0.1153, 0.1371, 0.1563, 0.1190, 0.1513]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 28 [   0/390]  Loss: 0.3675 (0.368)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 1.075e-02
Train: 28 [  50/390]  Loss: 0.2512 (0.297)  Acc@1: 89.0625 (90.2574)  Acc@5: 100.0000 (99.7855)LR: 1.075e-02
Train: 28 [ 100/390]  Loss: 0.2881 (0.315)  Acc@1: 89.0625 (89.3719)  Acc@5: 100.0000 (99.7679)LR: 1.075e-02
Train: 28 [ 150/390]  Loss: 0.3864 (0.314)  Acc@1: 87.5000 (89.0935)  Acc@5: 100.0000 (99.7206)LR: 1.075e-02
Train: 28 [ 200/390]  Loss: 0.2498 (0.316)  Acc@1: 92.1875 (89.0392)  Acc@5: 100.0000 (99.7357)LR: 1.075e-02
Train: 28 [ 250/390]  Loss: 0.4427 (0.322)  Acc@1: 85.9375 (88.7886)  Acc@5: 98.4375 (99.7261)LR: 1.075e-02
Train: 28 [ 300/390]  Loss: 0.2984 (0.318)  Acc@1: 90.6250 (88.8756)  Acc@5: 98.4375 (99.7249)LR: 1.075e-02
Train: 28 [ 350/390]  Loss: 0.4940 (0.323)  Acc@1: 89.0625 (88.7064)  Acc@5: 98.4375 (99.7106)LR: 1.075e-02
Train: 28 [ 390/390]  Loss: 0.3131 (0.321)  Acc@1: 90.0000 (88.8360)  Acc@5: 100.0000 (99.7240)LR: 1.075e-02
train_acc 88.836000
Valid: 28 [   0/390]  Loss: 0.4137 (0.414)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)
Valid: 28 [  50/390]  Loss: 0.6222 (0.456)  Acc@1: 81.2500 (85.7230)  Acc@5: 98.4375 (99.2953)
Valid: 28 [ 100/390]  Loss: 0.4387 (0.444)  Acc@1: 81.2500 (85.5662)  Acc@5: 100.0000 (99.4121)
Valid: 28 [ 150/390]  Loss: 0.4391 (0.435)  Acc@1: 82.8125 (85.6271)  Acc@5: 100.0000 (99.3895)
Valid: 28 [ 200/390]  Loss: 0.4241 (0.438)  Acc@1: 84.3750 (85.7743)  Acc@5: 100.0000 (99.4014)
Valid: 28 [ 250/390]  Loss: 0.3767 (0.425)  Acc@1: 89.0625 (86.1678)  Acc@5: 100.0000 (99.4086)
Valid: 28 [ 300/390]  Loss: 0.4691 (0.433)  Acc@1: 85.9375 (85.9531)  Acc@5: 98.4375 (99.3667)
Valid: 28 [ 350/390]  Loss: 0.2662 (0.430)  Acc@1: 89.0625 (86.0488)  Acc@5: 100.0000 (99.3679)
Valid: 28 [ 390/390]  Loss: 0.5549 (0.430)  Acc@1: 82.5000 (86.0120)  Acc@5: 97.5000 (99.3840)
valid_acc 86.012000
epoch = 28   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1434, 0.0817, 0.0655, 0.1077, 0.2263, 0.1321, 0.1225, 0.1208],
        [0.1937, 0.0684, 0.0577, 0.0896, 0.1518, 0.1468, 0.1440, 0.1480],
        [0.1540, 0.0778, 0.0665, 0.1005, 0.1946, 0.1842, 0.1226, 0.0999],
        [0.2541, 0.0658, 0.0596, 0.0887, 0.1222, 0.1236, 0.1559, 0.1302],
        [0.2112, 0.0528, 0.0506, 0.0851, 0.1420, 0.1638, 0.1469, 0.1476],
        [0.2003, 0.0778, 0.0631, 0.0944, 0.1522, 0.1376, 0.1347, 0.1398],
        [0.2463, 0.0649, 0.0551, 0.0827, 0.1500, 0.1454, 0.1207, 0.1350],
        [0.2995, 0.0466, 0.0423, 0.0741, 0.1269, 0.1456, 0.1351, 0.1301],
        [0.3307, 0.0431, 0.0411, 0.0609, 0.1322, 0.1294, 0.1236, 0.1390],
        [0.2548, 0.0776, 0.0671, 0.1011, 0.1182, 0.1210, 0.1317, 0.1285],
        [0.2863, 0.0600, 0.0548, 0.0783, 0.1455, 0.1160, 0.1379, 0.1211],
        [0.3532, 0.0458, 0.0433, 0.0765, 0.1320, 0.1111, 0.1229, 0.1153],
        [0.3334, 0.0440, 0.0422, 0.0612, 0.1340, 0.1179, 0.1200, 0.1473],
        [0.3817, 0.0409, 0.0401, 0.0528, 0.1198, 0.1150, 0.1309, 0.1187]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1129, 0.1388, 0.1195, 0.1259, 0.1319, 0.1165, 0.1248, 0.1297],
        [0.1254, 0.1152, 0.0982, 0.1368, 0.1331, 0.1373, 0.1385, 0.1156],
        [0.1060, 0.1460, 0.1329, 0.1228, 0.1264, 0.1166, 0.1167, 0.1326],
        [0.1272, 0.1183, 0.1083, 0.1205, 0.1293, 0.1508, 0.1351, 0.1106],
        [0.1328, 0.0892, 0.0798, 0.1298, 0.1385, 0.1221, 0.1441, 0.1638],
        [0.1122, 0.1276, 0.1300, 0.1264, 0.1355, 0.1183, 0.1194, 0.1306],
        [0.1242, 0.1210, 0.1162, 0.1235, 0.1381, 0.1363, 0.1273, 0.1134],
        [0.1359, 0.0871, 0.0843, 0.1365, 0.1388, 0.1498, 0.1300, 0.1376],
        [0.1429, 0.0780, 0.0863, 0.1306, 0.1180, 0.1397, 0.1680, 0.1365],
        [0.1027, 0.1324, 0.1231, 0.1397, 0.1301, 0.1352, 0.1307, 0.1061],
        [0.1135, 0.1162, 0.1087, 0.1407, 0.1526, 0.1239, 0.1195, 0.1249],
        [0.1373, 0.0882, 0.0825, 0.1464, 0.1383, 0.1355, 0.1332, 0.1387],
        [0.1484, 0.0767, 0.0804, 0.1290, 0.1240, 0.1522, 0.1503, 0.1390],
        [0.1671, 0.0781, 0.0758, 0.1153, 0.1381, 0.1549, 0.1180, 0.1527]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 29 [   0/390]  Loss: 0.2152 (0.215)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 1.002e-02
Train: 29 [  50/390]  Loss: 0.2459 (0.298)  Acc@1: 89.0625 (89.8284)  Acc@5: 100.0000 (99.7549)LR: 1.002e-02
Train: 29 [ 100/390]  Loss: 0.2504 (0.298)  Acc@1: 92.1875 (89.9443)  Acc@5: 100.0000 (99.7525)LR: 1.002e-02
Train: 29 [ 150/390]  Loss: 0.3281 (0.302)  Acc@1: 87.5000 (89.7351)  Acc@5: 100.0000 (99.7206)LR: 1.002e-02
Train: 29 [ 200/390]  Loss: 0.4055 (0.307)  Acc@1: 87.5000 (89.4823)  Acc@5: 98.4375 (99.7279)LR: 1.002e-02
Train: 29 [ 250/390]  Loss: 0.2229 (0.310)  Acc@1: 92.1875 (89.3302)  Acc@5: 100.0000 (99.7385)LR: 1.002e-02
Train: 29 [ 300/390]  Loss: 0.4700 (0.310)  Acc@1: 79.6875 (89.2961)  Acc@5: 100.0000 (99.7353)LR: 1.002e-02
Train: 29 [ 350/390]  Loss: 0.2583 (0.313)  Acc@1: 92.1875 (89.1916)  Acc@5: 100.0000 (99.7151)LR: 1.002e-02
Train: 29 [ 390/390]  Loss: 0.1915 (0.311)  Acc@1: 92.5000 (89.2400)  Acc@5: 100.0000 (99.7200)LR: 1.002e-02
train_acc 89.240000
Valid: 29 [   0/390]  Loss: 0.3654 (0.365)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 29 [  50/390]  Loss: 0.3980 (0.393)  Acc@1: 82.8125 (86.8873)  Acc@5: 100.0000 (99.3566)
Valid: 29 [ 100/390]  Loss: 0.3158 (0.400)  Acc@1: 90.6250 (86.7420)  Acc@5: 100.0000 (99.4740)
Valid: 29 [ 150/390]  Loss: 0.6266 (0.401)  Acc@1: 79.6875 (86.6204)  Acc@5: 98.4375 (99.4205)
Valid: 29 [ 200/390]  Loss: 0.4670 (0.405)  Acc@1: 79.6875 (86.5050)  Acc@5: 100.0000 (99.3781)
Valid: 29 [ 250/390]  Loss: 0.5549 (0.407)  Acc@1: 84.3750 (86.3297)  Acc@5: 96.8750 (99.3464)
Valid: 29 [ 300/390]  Loss: 0.4484 (0.404)  Acc@1: 81.2500 (86.4047)  Acc@5: 98.4375 (99.3615)
Valid: 29 [ 350/390]  Loss: 0.4594 (0.403)  Acc@1: 85.9375 (86.5028)  Acc@5: 98.4375 (99.3456)
Valid: 29 [ 390/390]  Loss: 0.4926 (0.400)  Acc@1: 85.0000 (86.6200)  Acc@5: 100.0000 (99.3600)
valid_acc 86.620000
epoch = 29   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1447, 0.0799, 0.0640, 0.1073, 0.2307, 0.1318, 0.1213, 0.1203],
        [0.1964, 0.0667, 0.0561, 0.0883, 0.1527, 0.1472, 0.1444, 0.1483],
        [0.1549, 0.0766, 0.0654, 0.1001, 0.1966, 0.1844, 0.1221, 0.0999],
        [0.2622, 0.0644, 0.0584, 0.0877, 0.1215, 0.1215, 0.1561, 0.1283],
        [0.2163, 0.0515, 0.0496, 0.0845, 0.1420, 0.1640, 0.1442, 0.1480],
        [0.2076, 0.0767, 0.0620, 0.0942, 0.1517, 0.1359, 0.1340, 0.1378],
        [0.2533, 0.0636, 0.0539, 0.0815, 0.1488, 0.1446, 0.1199, 0.1343],
        [0.3122, 0.0451, 0.0411, 0.0732, 0.1239, 0.1438, 0.1335, 0.1273],
        [0.3409, 0.0416, 0.0397, 0.0593, 0.1324, 0.1273, 0.1219, 0.1371],
        [0.2643, 0.0763, 0.0659, 0.1006, 0.1171, 0.1180, 0.1315, 0.1264],
        [0.2939, 0.0586, 0.0536, 0.0773, 0.1453, 0.1143, 0.1377, 0.1192],
        [0.3682, 0.0443, 0.0419, 0.0752, 0.1303, 0.1075, 0.1197, 0.1129],
        [0.3429, 0.0425, 0.0408, 0.0597, 0.1307, 0.1168, 0.1192, 0.1475],
        [0.3955, 0.0393, 0.0387, 0.0511, 0.1175, 0.1141, 0.1273, 0.1165]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1124, 0.1392, 0.1191, 0.1262, 0.1340, 0.1165, 0.1241, 0.1286],
        [0.1260, 0.1143, 0.0977, 0.1369, 0.1332, 0.1379, 0.1392, 0.1148],
        [0.1049, 0.1457, 0.1323, 0.1229, 0.1266, 0.1170, 0.1177, 0.1330],
        [0.1270, 0.1183, 0.1085, 0.1210, 0.1294, 0.1505, 0.1356, 0.1096],
        [0.1342, 0.0881, 0.0800, 0.1317, 0.1372, 0.1205, 0.1448, 0.1633],
        [0.1122, 0.1280, 0.1294, 0.1265, 0.1359, 0.1190, 0.1184, 0.1306],
        [0.1239, 0.1211, 0.1168, 0.1244, 0.1365, 0.1369, 0.1271, 0.1133],
        [0.1360, 0.0859, 0.0841, 0.1369, 0.1398, 0.1505, 0.1309, 0.1359],
        [0.1423, 0.0772, 0.0856, 0.1309, 0.1173, 0.1389, 0.1705, 0.1374],
        [0.1020, 0.1324, 0.1225, 0.1413, 0.1295, 0.1351, 0.1320, 0.1052],
        [0.1133, 0.1158, 0.1085, 0.1408, 0.1540, 0.1227, 0.1202, 0.1248],
        [0.1375, 0.0873, 0.0825, 0.1476, 0.1385, 0.1349, 0.1321, 0.1396],
        [0.1490, 0.0761, 0.0800, 0.1303, 0.1240, 0.1515, 0.1515, 0.1376],
        [0.1679, 0.0773, 0.0754, 0.1155, 0.1385, 0.1548, 0.1177, 0.1528]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 30 [   0/390]  Loss: 0.1482 (0.148)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 9.292e-03
Train: 30 [  50/390]  Loss: 0.2885 (0.292)  Acc@1: 89.0625 (89.5833)  Acc@5: 100.0000 (99.6630)LR: 9.292e-03
Train: 30 [ 100/390]  Loss: 0.2545 (0.279)  Acc@1: 92.1875 (90.0526)  Acc@5: 100.0000 (99.8144)LR: 9.292e-03
Train: 30 [ 150/390]  Loss: 0.1222 (0.283)  Acc@1: 98.4375 (90.1490)  Acc@5: 100.0000 (99.7827)LR: 9.292e-03
Train: 30 [ 200/390]  Loss: 0.3918 (0.284)  Acc@1: 85.9375 (90.1430)  Acc@5: 100.0000 (99.7590)LR: 9.292e-03
Train: 30 [ 250/390]  Loss: 0.2040 (0.286)  Acc@1: 95.3125 (89.9963)  Acc@5: 100.0000 (99.7759)LR: 9.292e-03
Train: 30 [ 300/390]  Loss: 0.2950 (0.285)  Acc@1: 89.0625 (90.0644)  Acc@5: 100.0000 (99.7820)LR: 9.292e-03
Train: 30 [ 350/390]  Loss: 0.4429 (0.290)  Acc@1: 90.6250 (89.8949)  Acc@5: 100.0000 (99.7863)LR: 9.292e-03
Train: 30 [ 390/390]  Loss: 0.3263 (0.291)  Acc@1: 90.0000 (89.9720)  Acc@5: 100.0000 (99.7800)LR: 9.292e-03
train_acc 89.972000
Valid: 30 [   0/390]  Loss: 0.4316 (0.432)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)
Valid: 30 [  50/390]  Loss: 0.4818 (0.387)  Acc@1: 89.0625 (87.6225)  Acc@5: 98.4375 (99.2953)
Valid: 30 [ 100/390]  Loss: 0.6244 (0.399)  Acc@1: 84.3750 (86.9431)  Acc@5: 98.4375 (99.3967)
Valid: 30 [ 150/390]  Loss: 0.4272 (0.402)  Acc@1: 81.2500 (86.7446)  Acc@5: 100.0000 (99.3688)
Valid: 30 [ 200/390]  Loss: 0.4654 (0.414)  Acc@1: 82.8125 (86.4428)  Acc@5: 98.4375 (99.3081)
Valid: 30 [ 250/390]  Loss: 0.4111 (0.416)  Acc@1: 82.8125 (86.4168)  Acc@5: 98.4375 (99.3152)
Valid: 30 [ 300/390]  Loss: 0.1601 (0.418)  Acc@1: 95.3125 (86.4151)  Acc@5: 100.0000 (99.3096)
Valid: 30 [ 350/390]  Loss: 0.5191 (0.412)  Acc@1: 84.3750 (86.5340)  Acc@5: 96.8750 (99.3278)
Valid: 30 [ 390/390]  Loss: 0.3904 (0.408)  Acc@1: 82.5000 (86.6760)  Acc@5: 100.0000 (99.3440)
valid_acc 86.676000
epoch = 30   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1453, 0.0785, 0.0633, 0.1075, 0.2301, 0.1329, 0.1221, 0.1203],
        [0.1998, 0.0653, 0.0551, 0.0878, 0.1537, 0.1461, 0.1446, 0.1475],
        [0.1566, 0.0753, 0.0654, 0.1012, 0.1961, 0.1849, 0.1213, 0.0992],
        [0.2700, 0.0626, 0.0571, 0.0866, 0.1214, 0.1201, 0.1539, 0.1283],
        [0.2218, 0.0496, 0.0484, 0.0837, 0.1432, 0.1644, 0.1421, 0.1467],
        [0.2123, 0.0755, 0.0615, 0.0946, 0.1516, 0.1347, 0.1331, 0.1367],
        [0.2629, 0.0620, 0.0526, 0.0801, 0.1468, 0.1434, 0.1191, 0.1331],
        [0.3207, 0.0434, 0.0397, 0.0714, 0.1219, 0.1448, 0.1323, 0.1257],
        [0.3533, 0.0399, 0.0384, 0.0578, 0.1303, 0.1243, 0.1204, 0.1356],
        [0.2702, 0.0756, 0.0657, 0.1016, 0.1164, 0.1159, 0.1299, 0.1247],
        [0.3048, 0.0574, 0.0526, 0.0763, 0.1435, 0.1113, 0.1371, 0.1171],
        [0.3806, 0.0429, 0.0408, 0.0740, 0.1284, 0.1043, 0.1178, 0.1112],
        [0.3561, 0.0409, 0.0396, 0.0584, 0.1291, 0.1132, 0.1168, 0.1460],
        [0.4126, 0.0380, 0.0376, 0.0499, 0.1130, 0.1112, 0.1243, 0.1134]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1114, 0.1395, 0.1190, 0.1262, 0.1357, 0.1164, 0.1240, 0.1279],
        [0.1270, 0.1131, 0.0967, 0.1369, 0.1336, 0.1387, 0.1392, 0.1150],
        [0.1043, 0.1466, 0.1325, 0.1232, 0.1268, 0.1165, 0.1181, 0.1320],
        [0.1285, 0.1171, 0.1077, 0.1207, 0.1301, 0.1521, 0.1355, 0.1082],
        [0.1344, 0.0870, 0.0795, 0.1321, 0.1372, 0.1199, 0.1450, 0.1649],
        [0.1110, 0.1280, 0.1288, 0.1281, 0.1364, 0.1197, 0.1176, 0.1303],
        [0.1252, 0.1206, 0.1171, 0.1266, 0.1348, 0.1355, 0.1273, 0.1129],
        [0.1366, 0.0849, 0.0834, 0.1371, 0.1393, 0.1501, 0.1327, 0.1359],
        [0.1430, 0.0758, 0.0847, 0.1303, 0.1181, 0.1385, 0.1712, 0.1385],
        [0.1010, 0.1324, 0.1219, 0.1425, 0.1295, 0.1352, 0.1326, 0.1051],
        [0.1128, 0.1155, 0.1085, 0.1420, 0.1537, 0.1226, 0.1206, 0.1243],
        [0.1370, 0.0867, 0.0824, 0.1487, 0.1383, 0.1349, 0.1325, 0.1395],
        [0.1489, 0.0750, 0.0796, 0.1307, 0.1255, 0.1517, 0.1535, 0.1350],
        [0.1680, 0.0762, 0.0752, 0.1159, 0.1397, 0.1553, 0.1171, 0.1526]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 31 [   0/390]  Loss: 0.3398 (0.340)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 8.583e-03
Train: 31 [  50/390]  Loss: 0.2980 (0.305)  Acc@1: 85.9375 (89.4914)  Acc@5: 100.0000 (99.7243)LR: 8.583e-03
Train: 31 [ 100/390]  Loss: 0.3653 (0.294)  Acc@1: 89.0625 (89.8824)  Acc@5: 98.4375 (99.7834)LR: 8.583e-03
Train: 31 [ 150/390]  Loss: 0.2267 (0.288)  Acc@1: 92.1875 (89.9731)  Acc@5: 100.0000 (99.8241)LR: 8.583e-03
Train: 31 [ 200/390]  Loss: 0.4668 (0.288)  Acc@1: 81.2500 (90.0342)  Acc@5: 100.0000 (99.8212)LR: 8.583e-03
Train: 31 [ 250/390]  Loss: 0.2606 (0.291)  Acc@1: 90.6250 (89.8780)  Acc@5: 100.0000 (99.8319)LR: 8.583e-03
Train: 31 [ 300/390]  Loss: 0.3695 (0.291)  Acc@1: 92.1875 (89.9294)  Acc@5: 100.0000 (99.8131)LR: 8.583e-03
Train: 31 [ 350/390]  Loss: 0.2355 (0.292)  Acc@1: 93.7500 (89.8949)  Acc@5: 100.0000 (99.8086)LR: 8.583e-03
Train: 31 [ 390/390]  Loss: 0.2476 (0.292)  Acc@1: 92.5000 (89.8520)  Acc@5: 100.0000 (99.8120)LR: 8.583e-03
train_acc 89.852000
Valid: 31 [   0/390]  Loss: 0.5164 (0.516)  Acc@1: 85.9375 (85.9375)  Acc@5: 98.4375 (98.4375)
Valid: 31 [  50/390]  Loss: 0.4286 (0.436)  Acc@1: 87.5000 (85.7843)  Acc@5: 100.0000 (99.4179)
Valid: 31 [ 100/390]  Loss: 0.4750 (0.431)  Acc@1: 84.3750 (85.6745)  Acc@5: 98.4375 (99.4121)
Valid: 31 [ 150/390]  Loss: 0.3643 (0.432)  Acc@1: 85.9375 (85.4925)  Acc@5: 100.0000 (99.4309)
Valid: 31 [ 200/390]  Loss: 0.6421 (0.434)  Acc@1: 81.2500 (85.4478)  Acc@5: 98.4375 (99.4014)
Valid: 31 [ 250/390]  Loss: 0.4097 (0.430)  Acc@1: 85.9375 (85.6823)  Acc@5: 98.4375 (99.4148)
Valid: 31 [ 300/390]  Loss: 0.2701 (0.429)  Acc@1: 92.1875 (85.6779)  Acc@5: 98.4375 (99.4186)
Valid: 31 [ 350/390]  Loss: 0.6720 (0.427)  Acc@1: 82.8125 (85.8485)  Acc@5: 98.4375 (99.3946)
Valid: 31 [ 390/390]  Loss: 0.4021 (0.427)  Acc@1: 90.0000 (85.7960)  Acc@5: 100.0000 (99.4080)
valid_acc 85.796000
epoch = 31   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1454, 0.0771, 0.0629, 0.1087, 0.2303, 0.1344, 0.1228, 0.1184],
        [0.2036, 0.0637, 0.0539, 0.0872, 0.1538, 0.1456, 0.1447, 0.1474],
        [0.1573, 0.0739, 0.0650, 0.1016, 0.1972, 0.1863, 0.1201, 0.0986],
        [0.2787, 0.0613, 0.0564, 0.0865, 0.1208, 0.1190, 0.1508, 0.1266],
        [0.2262, 0.0483, 0.0477, 0.0833, 0.1439, 0.1653, 0.1405, 0.1448],
        [0.2178, 0.0743, 0.0612, 0.0953, 0.1515, 0.1330, 0.1322, 0.1346],
        [0.2718, 0.0604, 0.0516, 0.0796, 0.1460, 0.1419, 0.1166, 0.1321],
        [0.3302, 0.0419, 0.0388, 0.0704, 0.1186, 0.1436, 0.1315, 0.1250],
        [0.3666, 0.0384, 0.0371, 0.0562, 0.1288, 0.1221, 0.1173, 0.1334],
        [0.2753, 0.0741, 0.0652, 0.1020, 0.1162, 0.1149, 0.1288, 0.1235],
        [0.3177, 0.0560, 0.0517, 0.0758, 0.1406, 0.1084, 0.1348, 0.1151],
        [0.3944, 0.0415, 0.0398, 0.0732, 0.1264, 0.1003, 0.1157, 0.1087],
        [0.3713, 0.0393, 0.0383, 0.0571, 0.1244, 0.1115, 0.1145, 0.1435],
        [0.4299, 0.0364, 0.0363, 0.0484, 0.1105, 0.1079, 0.1195, 0.1110]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1116, 0.1393, 0.1186, 0.1261, 0.1352, 0.1176, 0.1241, 0.1275],
        [0.1267, 0.1114, 0.0953, 0.1375, 0.1355, 0.1383, 0.1410, 0.1143],
        [0.1038, 0.1468, 0.1324, 0.1233, 0.1266, 0.1165, 0.1175, 0.1333],
        [0.1294, 0.1160, 0.1072, 0.1223, 0.1293, 0.1528, 0.1355, 0.1074],
        [0.1346, 0.0857, 0.0794, 0.1321, 0.1372, 0.1210, 0.1460, 0.1640],
        [0.1113, 0.1268, 0.1283, 0.1296, 0.1354, 0.1194, 0.1180, 0.1312],
        [0.1253, 0.1190, 0.1159, 0.1270, 0.1362, 0.1364, 0.1273, 0.1130],
        [0.1369, 0.0831, 0.0830, 0.1367, 0.1402, 0.1491, 0.1347, 0.1364],
        [0.1431, 0.0736, 0.0834, 0.1285, 0.1190, 0.1398, 0.1742, 0.1384],
        [0.1004, 0.1319, 0.1214, 0.1437, 0.1293, 0.1353, 0.1329, 0.1051],
        [0.1131, 0.1151, 0.1088, 0.1430, 0.1541, 0.1221, 0.1208, 0.1231],
        [0.1374, 0.0850, 0.0822, 0.1489, 0.1411, 0.1346, 0.1326, 0.1382],
        [0.1494, 0.0734, 0.0791, 0.1309, 0.1253, 0.1530, 0.1542, 0.1347],
        [0.1669, 0.0749, 0.0747, 0.1156, 0.1418, 0.1574, 0.1159, 0.1528]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 32 [   0/390]  Loss: 0.2528 (0.253)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 7.891e-03
Train: 32 [  50/390]  Loss: 0.1378 (0.281)  Acc@1: 95.3125 (90.1042)  Acc@5: 100.0000 (99.7549)LR: 7.891e-03
Train: 32 [ 100/390]  Loss: 0.3800 (0.281)  Acc@1: 85.9375 (90.2382)  Acc@5: 98.4375 (99.7834)LR: 7.891e-03
Train: 32 [ 150/390]  Loss: 0.5971 (0.289)  Acc@1: 81.2500 (90.0766)  Acc@5: 96.8750 (99.8137)LR: 7.891e-03
Train: 32 [ 200/390]  Loss: 0.2880 (0.294)  Acc@1: 89.0625 (89.8399)  Acc@5: 98.4375 (99.7746)LR: 7.891e-03
Train: 32 [ 250/390]  Loss: 0.2902 (0.294)  Acc@1: 87.5000 (89.8904)  Acc@5: 98.4375 (99.7634)LR: 7.891e-03
Train: 32 [ 300/390]  Loss: 0.2061 (0.294)  Acc@1: 92.1875 (89.8412)  Acc@5: 100.0000 (99.7612)LR: 7.891e-03
Train: 32 [ 350/390]  Loss: 0.2667 (0.294)  Acc@1: 89.0625 (89.7792)  Acc@5: 100.0000 (99.7641)LR: 7.891e-03
Train: 32 [ 390/390]  Loss: 0.3109 (0.293)  Acc@1: 87.5000 (89.8000)  Acc@5: 100.0000 (99.7720)LR: 7.891e-03
train_acc 89.800000
Valid: 32 [   0/390]  Loss: 0.6249 (0.625)  Acc@1: 85.9375 (85.9375)  Acc@5: 98.4375 (98.4375)
Valid: 32 [  50/390]  Loss: 0.4366 (0.384)  Acc@1: 84.3750 (87.6838)  Acc@5: 100.0000 (99.4179)
Valid: 32 [ 100/390]  Loss: 0.3857 (0.401)  Acc@1: 84.3750 (86.6182)  Acc@5: 98.4375 (99.3657)
Valid: 32 [ 150/390]  Loss: 0.4718 (0.388)  Acc@1: 85.9375 (87.0033)  Acc@5: 100.0000 (99.4412)
Valid: 32 [ 200/390]  Loss: 0.3172 (0.386)  Acc@1: 89.0625 (87.1657)  Acc@5: 100.0000 (99.4403)
Valid: 32 [ 250/390]  Loss: 0.2304 (0.389)  Acc@1: 93.7500 (87.1078)  Acc@5: 100.0000 (99.4273)
Valid: 32 [ 300/390]  Loss: 0.4122 (0.384)  Acc@1: 87.5000 (87.1989)  Acc@5: 100.0000 (99.4549)
Valid: 32 [ 350/390]  Loss: 0.5645 (0.384)  Acc@1: 85.9375 (87.1839)  Acc@5: 96.8750 (99.4480)
Valid: 32 [ 390/390]  Loss: 0.1508 (0.383)  Acc@1: 95.0000 (87.1920)  Acc@5: 100.0000 (99.4360)
valid_acc 87.192000
epoch = 32   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1457, 0.0754, 0.0620, 0.1086, 0.2344, 0.1336, 0.1229, 0.1174],
        [0.2073, 0.0617, 0.0526, 0.0857, 0.1552, 0.1455, 0.1447, 0.1474],
        [0.1585, 0.0725, 0.0648, 0.1023, 0.1975, 0.1871, 0.1198, 0.0975],
        [0.2871, 0.0592, 0.0553, 0.0851, 0.1197, 0.1176, 0.1497, 0.1263],
        [0.2283, 0.0466, 0.0468, 0.0822, 0.1438, 0.1667, 0.1403, 0.1453],
        [0.2226, 0.0730, 0.0607, 0.0958, 0.1514, 0.1326, 0.1302, 0.1336],
        [0.2821, 0.0585, 0.0505, 0.0784, 0.1450, 0.1401, 0.1150, 0.1304],
        [0.3404, 0.0404, 0.0380, 0.0695, 0.1150, 0.1439, 0.1295, 0.1232],
        [0.3770, 0.0370, 0.0360, 0.0547, 0.1283, 0.1193, 0.1164, 0.1311],
        [0.2825, 0.0731, 0.0647, 0.1025, 0.1154, 0.1149, 0.1260, 0.1210],
        [0.3306, 0.0541, 0.0504, 0.0744, 0.1387, 0.1063, 0.1330, 0.1125],
        [0.4091, 0.0400, 0.0389, 0.0721, 0.1228, 0.0977, 0.1127, 0.1068],
        [0.3836, 0.0380, 0.0373, 0.0559, 0.1212, 0.1097, 0.1138, 0.1405],
        [0.4469, 0.0349, 0.0351, 0.0470, 0.1072, 0.1058, 0.1154, 0.1077]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1110, 0.1392, 0.1187, 0.1262, 0.1357, 0.1189, 0.1236, 0.1267],
        [0.1275, 0.1097, 0.0937, 0.1375, 0.1358, 0.1387, 0.1425, 0.1147],
        [0.1030, 0.1455, 0.1314, 0.1240, 0.1261, 0.1186, 0.1169, 0.1346],
        [0.1304, 0.1139, 0.1053, 0.1227, 0.1296, 0.1541, 0.1362, 0.1078],
        [0.1347, 0.0838, 0.0788, 0.1317, 0.1366, 0.1222, 0.1466, 0.1656],
        [0.1111, 0.1263, 0.1282, 0.1304, 0.1351, 0.1193, 0.1173, 0.1324],
        [0.1260, 0.1185, 0.1153, 0.1268, 0.1352, 0.1366, 0.1287, 0.1129],
        [0.1370, 0.0819, 0.0832, 0.1374, 0.1409, 0.1495, 0.1341, 0.1360],
        [0.1426, 0.0724, 0.0831, 0.1285, 0.1197, 0.1406, 0.1751, 0.1380],
        [0.0995, 0.1312, 0.1211, 0.1443, 0.1289, 0.1366, 0.1341, 0.1043],
        [0.1130, 0.1131, 0.1071, 0.1441, 0.1556, 0.1219, 0.1212, 0.1240],
        [0.1373, 0.0835, 0.0822, 0.1498, 0.1426, 0.1341, 0.1334, 0.1370],
        [0.1490, 0.0722, 0.0790, 0.1316, 0.1252, 0.1533, 0.1556, 0.1341],
        [0.1676, 0.0738, 0.0752, 0.1164, 0.1413, 0.1581, 0.1159, 0.1517]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 33 [   0/390]  Loss: 0.1343 (0.134)  Acc@1: 96.8750 (96.8750)  Acc@5: 100.0000 (100.0000)LR: 7.219e-03
Train: 33 [  50/390]  Loss: 0.2866 (0.288)  Acc@1: 92.1875 (89.7365)  Acc@5: 100.0000 (99.7855)LR: 7.219e-03
Train: 33 [ 100/390]  Loss: 0.2768 (0.280)  Acc@1: 90.6250 (90.2537)  Acc@5: 100.0000 (99.7525)LR: 7.219e-03
Train: 33 [ 150/390]  Loss: 0.4147 (0.287)  Acc@1: 82.8125 (90.0455)  Acc@5: 100.0000 (99.7310)LR: 7.219e-03
Train: 33 [ 200/390]  Loss: 0.3395 (0.285)  Acc@1: 89.0625 (90.0886)  Acc@5: 100.0000 (99.7668)LR: 7.219e-03
Train: 33 [ 250/390]  Loss: 0.4374 (0.284)  Acc@1: 84.3750 (90.1083)  Acc@5: 100.0000 (99.7759)LR: 7.219e-03
Train: 33 [ 300/390]  Loss: 0.3126 (0.285)  Acc@1: 90.6250 (90.0748)  Acc@5: 100.0000 (99.7872)LR: 7.219e-03
Train: 33 [ 350/390]  Loss: 0.1679 (0.290)  Acc@1: 92.1875 (89.8816)  Acc@5: 100.0000 (99.7463)LR: 7.219e-03
Train: 33 [ 390/390]  Loss: 0.2296 (0.289)  Acc@1: 90.0000 (89.9200)  Acc@5: 100.0000 (99.7280)LR: 7.219e-03
train_acc 89.920000
Valid: 33 [   0/390]  Loss: 0.3646 (0.365)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)
Valid: 33 [  50/390]  Loss: 0.1860 (0.414)  Acc@1: 92.1875 (86.4277)  Acc@5: 100.0000 (99.4485)
Valid: 33 [ 100/390]  Loss: 0.2334 (0.398)  Acc@1: 90.6250 (87.1132)  Acc@5: 100.0000 (99.4276)
Valid: 33 [ 150/390]  Loss: 0.2852 (0.386)  Acc@1: 92.1875 (87.4586)  Acc@5: 98.4375 (99.3895)
Valid: 33 [ 200/390]  Loss: 0.3372 (0.382)  Acc@1: 90.6250 (87.4922)  Acc@5: 100.0000 (99.4014)
Valid: 33 [ 250/390]  Loss: 0.7439 (0.385)  Acc@1: 75.0000 (87.3693)  Acc@5: 96.8750 (99.4086)
Valid: 33 [ 300/390]  Loss: 0.3074 (0.387)  Acc@1: 85.9375 (87.1937)  Acc@5: 100.0000 (99.4082)
Valid: 33 [ 350/390]  Loss: 0.5784 (0.388)  Acc@1: 79.6875 (87.1661)  Acc@5: 98.4375 (99.4257)
Valid: 33 [ 390/390]  Loss: 0.2698 (0.388)  Acc@1: 92.5000 (87.1800)  Acc@5: 100.0000 (99.4400)
valid_acc 87.180000
epoch = 33   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1469, 0.0732, 0.0608, 0.1077, 0.2394, 0.1342, 0.1223, 0.1154],
        [0.2095, 0.0600, 0.0516, 0.0845, 0.1560, 0.1457, 0.1449, 0.1477],
        [0.1598, 0.0710, 0.0639, 0.1018, 0.1982, 0.1887, 0.1200, 0.0965],
        [0.2950, 0.0579, 0.0547, 0.0845, 0.1186, 0.1156, 0.1487, 0.1250],
        [0.2348, 0.0456, 0.0463, 0.0821, 0.1439, 0.1646, 0.1382, 0.1445],
        [0.2276, 0.0714, 0.0602, 0.0958, 0.1514, 0.1320, 0.1289, 0.1326],
        [0.2920, 0.0572, 0.0502, 0.0781, 0.1439, 0.1366, 0.1132, 0.1288],
        [0.3483, 0.0395, 0.0377, 0.0689, 0.1131, 0.1433, 0.1274, 0.1218],
        [0.3891, 0.0358, 0.0353, 0.0535, 0.1257, 0.1163, 0.1146, 0.1297],
        [0.2904, 0.0715, 0.0639, 0.1021, 0.1150, 0.1131, 0.1246, 0.1194],
        [0.3396, 0.0528, 0.0499, 0.0739, 0.1376, 0.1049, 0.1310, 0.1103],
        [0.4238, 0.0388, 0.0381, 0.0709, 0.1199, 0.0948, 0.1096, 0.1043],
        [0.3960, 0.0367, 0.0365, 0.0547, 0.1181, 0.1075, 0.1120, 0.1384],
        [0.4611, 0.0336, 0.0342, 0.0458, 0.1049, 0.1033, 0.1109, 0.1061]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1105, 0.1385, 0.1186, 0.1255, 0.1361, 0.1195, 0.1243, 0.1271],
        [0.1277, 0.1079, 0.0921, 0.1363, 0.1379, 0.1386, 0.1443, 0.1151],
        [0.1032, 0.1451, 0.1319, 0.1255, 0.1249, 0.1186, 0.1166, 0.1343],
        [0.1310, 0.1127, 0.1041, 0.1234, 0.1295, 0.1564, 0.1364, 0.1066],
        [0.1350, 0.0834, 0.0788, 0.1323, 0.1361, 0.1223, 0.1459, 0.1662],
        [0.1112, 0.1251, 0.1274, 0.1307, 0.1362, 0.1205, 0.1170, 0.1319],
        [0.1264, 0.1175, 0.1146, 0.1271, 0.1351, 0.1371, 0.1296, 0.1125],
        [0.1370, 0.0816, 0.0831, 0.1378, 0.1419, 0.1481, 0.1346, 0.1359],
        [0.1428, 0.0712, 0.0822, 0.1274, 0.1208, 0.1418, 0.1763, 0.1375],
        [0.0992, 0.1303, 0.1205, 0.1460, 0.1296, 0.1366, 0.1337, 0.1040],
        [0.1124, 0.1123, 0.1066, 0.1446, 0.1566, 0.1214, 0.1218, 0.1242],
        [0.1358, 0.0833, 0.0827, 0.1502, 0.1438, 0.1345, 0.1333, 0.1363],
        [0.1498, 0.0715, 0.0793, 0.1326, 0.1260, 0.1521, 0.1558, 0.1328],
        [0.1668, 0.0730, 0.0751, 0.1165, 0.1420, 0.1585, 0.1166, 0.1514]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 34 [   0/390]  Loss: 0.2616 (0.262)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 6.570e-03
Train: 34 [  50/390]  Loss: 0.4340 (0.263)  Acc@1: 84.3750 (90.4718)  Acc@5: 100.0000 (99.8468)LR: 6.570e-03
Train: 34 [ 100/390]  Loss: 0.3075 (0.266)  Acc@1: 90.6250 (90.7024)  Acc@5: 98.4375 (99.8298)LR: 6.570e-03
Train: 34 [ 150/390]  Loss: 0.1810 (0.263)  Acc@1: 90.6250 (90.8837)  Acc@5: 100.0000 (99.8551)LR: 6.570e-03
Train: 34 [ 200/390]  Loss: 0.3968 (0.265)  Acc@1: 85.9375 (90.7882)  Acc@5: 98.4375 (99.8523)LR: 6.570e-03
Train: 34 [ 250/390]  Loss: 0.2490 (0.268)  Acc@1: 92.1875 (90.7184)  Acc@5: 100.0000 (99.8381)LR: 6.570e-03
Train: 34 [ 300/390]  Loss: 0.2936 (0.268)  Acc@1: 90.6250 (90.7444)  Acc@5: 100.0000 (99.8443)LR: 6.570e-03
Train: 34 [ 350/390]  Loss: 0.1509 (0.271)  Acc@1: 93.7500 (90.5849)  Acc@5: 100.0000 (99.8175)LR: 6.570e-03
Train: 34 [ 390/390]  Loss: 0.1650 (0.274)  Acc@1: 90.0000 (90.5040)  Acc@5: 100.0000 (99.8000)LR: 6.570e-03
train_acc 90.504000
Valid: 34 [   0/390]  Loss: 0.3339 (0.334)  Acc@1: 89.0625 (89.0625)  Acc@5: 98.4375 (98.4375)
Valid: 34 [  50/390]  Loss: 0.2573 (0.405)  Acc@1: 89.0625 (86.3971)  Acc@5: 100.0000 (99.5098)
Valid: 34 [ 100/390]  Loss: 0.3925 (0.398)  Acc@1: 87.5000 (86.6182)  Acc@5: 98.4375 (99.4431)
Valid: 34 [ 150/390]  Loss: 0.7574 (0.394)  Acc@1: 85.9375 (86.8274)  Acc@5: 100.0000 (99.4723)
Valid: 34 [ 200/390]  Loss: 0.4022 (0.397)  Acc@1: 87.5000 (86.7848)  Acc@5: 100.0000 (99.5103)
Valid: 34 [ 250/390]  Loss: 0.4625 (0.396)  Acc@1: 84.3750 (86.8713)  Acc@5: 98.4375 (99.4584)
Valid: 34 [ 300/390]  Loss: 0.3183 (0.398)  Acc@1: 89.0625 (86.9082)  Acc@5: 100.0000 (99.4446)
Valid: 34 [ 350/390]  Loss: 0.2765 (0.396)  Acc@1: 90.6250 (87.0059)  Acc@5: 100.0000 (99.4703)
Valid: 34 [ 390/390]  Loss: 0.4074 (0.396)  Acc@1: 85.0000 (87.1120)  Acc@5: 100.0000 (99.4640)
valid_acc 87.112000
epoch = 34   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('max_pool_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_5x5', 4), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1481, 0.0713, 0.0599, 0.1077, 0.2403, 0.1350, 0.1225, 0.1152],
        [0.2131, 0.0584, 0.0506, 0.0835, 0.1568, 0.1451, 0.1443, 0.1481],
        [0.1615, 0.0694, 0.0630, 0.1016, 0.1985, 0.1900, 0.1207, 0.0954],
        [0.3020, 0.0564, 0.0536, 0.0835, 0.1181, 0.1150, 0.1478, 0.1235],
        [0.2417, 0.0446, 0.0457, 0.0822, 0.1440, 0.1616, 0.1364, 0.1439],
        [0.2334, 0.0698, 0.0597, 0.0961, 0.1491, 0.1311, 0.1285, 0.1323],
        [0.3003, 0.0552, 0.0489, 0.0765, 0.1441, 0.1346, 0.1126, 0.1277],
        [0.3586, 0.0382, 0.0369, 0.0679, 0.1105, 0.1428, 0.1251, 0.1200],
        [0.4017, 0.0343, 0.0340, 0.0517, 0.1241, 0.1131, 0.1126, 0.1283],
        [0.2982, 0.0701, 0.0633, 0.1019, 0.1141, 0.1122, 0.1229, 0.1173],
        [0.3475, 0.0517, 0.0492, 0.0732, 0.1380, 0.1028, 0.1293, 0.1084],
        [0.4382, 0.0377, 0.0373, 0.0696, 0.1172, 0.0920, 0.1066, 0.1015],
        [0.4113, 0.0354, 0.0355, 0.0533, 0.1153, 0.1043, 0.1098, 0.1350],
        [0.4768, 0.0325, 0.0333, 0.0448, 0.1014, 0.1006, 0.1073, 0.1032]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1096, 0.1380, 0.1177, 0.1255, 0.1373, 0.1197, 0.1264, 0.1257],
        [0.1286, 0.1073, 0.0916, 0.1355, 0.1388, 0.1387, 0.1454, 0.1141],
        [0.1027, 0.1445, 0.1306, 0.1276, 0.1231, 0.1195, 0.1163, 0.1357],
        [0.1312, 0.1119, 0.1034, 0.1228, 0.1305, 0.1563, 0.1374, 0.1065],
        [0.1349, 0.0815, 0.0779, 0.1323, 0.1363, 0.1225, 0.1468, 0.1678],
        [0.1113, 0.1240, 0.1265, 0.1327, 0.1365, 0.1195, 0.1171, 0.1324],
        [0.1261, 0.1171, 0.1141, 0.1276, 0.1352, 0.1376, 0.1303, 0.1119],
        [0.1372, 0.0803, 0.0823, 0.1381, 0.1428, 0.1473, 0.1354, 0.1367],
        [0.1438, 0.0698, 0.0816, 0.1280, 0.1209, 0.1422, 0.1763, 0.1375],
        [0.0993, 0.1289, 0.1193, 0.1466, 0.1292, 0.1379, 0.1346, 0.1043],
        [0.1117, 0.1118, 0.1060, 0.1452, 0.1575, 0.1218, 0.1215, 0.1246],
        [0.1355, 0.0823, 0.0820, 0.1504, 0.1452, 0.1344, 0.1347, 0.1355],
        [0.1501, 0.0700, 0.0781, 0.1324, 0.1271, 0.1521, 0.1570, 0.1332],
        [0.1673, 0.0721, 0.0743, 0.1167, 0.1441, 0.1591, 0.1166, 0.1498]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 35 [   0/390]  Loss: 0.3170 (0.317)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)LR: 5.947e-03
Train: 35 [  50/390]  Loss: 0.3255 (0.263)  Acc@1: 87.5000 (90.9926)  Acc@5: 100.0000 (99.8162)LR: 5.947e-03
Train: 35 [ 100/390]  Loss: 0.2124 (0.266)  Acc@1: 90.6250 (90.7952)  Acc@5: 100.0000 (99.7989)LR: 5.947e-03
Train: 35 [ 150/390]  Loss: 0.2806 (0.274)  Acc@1: 89.0625 (90.7492)  Acc@5: 100.0000 (99.8137)LR: 5.947e-03
Train: 35 [ 200/390]  Loss: 0.2532 (0.279)  Acc@1: 89.0625 (90.3141)  Acc@5: 100.0000 (99.7979)LR: 5.947e-03
Train: 35 [ 250/390]  Loss: 0.2254 (0.275)  Acc@1: 89.0625 (90.3884)  Acc@5: 100.0000 (99.8070)LR: 5.947e-03
Train: 35 [ 300/390]  Loss: 0.2112 (0.275)  Acc@1: 93.7500 (90.3395)  Acc@5: 100.0000 (99.8183)LR: 5.947e-03
Train: 35 [ 350/390]  Loss: 0.3693 (0.274)  Acc@1: 87.5000 (90.3535)  Acc@5: 100.0000 (99.8175)LR: 5.947e-03
Train: 35 [ 390/390]  Loss: 0.2479 (0.269)  Acc@1: 90.0000 (90.6000)  Acc@5: 100.0000 (99.8280)LR: 5.947e-03
train_acc 90.600000
Valid: 35 [   0/390]  Loss: 0.4830 (0.483)  Acc@1: 84.3750 (84.3750)  Acc@5: 96.8750 (96.8750)
Valid: 35 [  50/390]  Loss: 0.4693 (0.388)  Acc@1: 87.5000 (87.7451)  Acc@5: 98.4375 (99.4485)
Valid: 35 [ 100/390]  Loss: 0.2518 (0.392)  Acc@1: 92.1875 (87.5000)  Acc@5: 100.0000 (99.4431)
Valid: 35 [ 150/390]  Loss: 0.5033 (0.390)  Acc@1: 84.3750 (87.3344)  Acc@5: 100.0000 (99.5033)
Valid: 35 [ 200/390]  Loss: 0.3667 (0.387)  Acc@1: 85.9375 (87.4145)  Acc@5: 98.4375 (99.5025)
Valid: 35 [ 250/390]  Loss: 0.3681 (0.390)  Acc@1: 84.3750 (87.4315)  Acc@5: 100.0000 (99.4771)
Valid: 35 [ 300/390]  Loss: 0.2822 (0.392)  Acc@1: 89.0625 (87.3287)  Acc@5: 100.0000 (99.4498)
Valid: 35 [ 350/390]  Loss: 0.2758 (0.387)  Acc@1: 89.0625 (87.4599)  Acc@5: 100.0000 (99.4881)
Valid: 35 [ 390/390]  Loss: 0.3466 (0.387)  Acc@1: 85.0000 (87.4560)  Acc@5: 100.0000 (99.4920)
valid_acc 87.456000
epoch = 35   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1468, 0.0701, 0.0598, 0.1083, 0.2433, 0.1358, 0.1221, 0.1139],
        [0.2200, 0.0566, 0.0495, 0.0822, 0.1570, 0.1443, 0.1423, 0.1481],
        [0.1614, 0.0684, 0.0630, 0.1023, 0.1998, 0.1895, 0.1211, 0.0945],
        [0.3119, 0.0550, 0.0528, 0.0826, 0.1172, 0.1136, 0.1458, 0.1210],
        [0.2493, 0.0434, 0.0451, 0.0819, 0.1432, 0.1606, 0.1343, 0.1423],
        [0.2366, 0.0685, 0.0598, 0.0969, 0.1501, 0.1301, 0.1271, 0.1309],
        [0.3130, 0.0536, 0.0482, 0.0756, 0.1431, 0.1302, 0.1109, 0.1255],
        [0.3701, 0.0370, 0.0363, 0.0669, 0.1078, 0.1429, 0.1220, 0.1171],
        [0.4155, 0.0332, 0.0332, 0.0504, 0.1207, 0.1099, 0.1109, 0.1262],
        [0.3061, 0.0688, 0.0632, 0.1025, 0.1134, 0.1116, 0.1197, 0.1146],
        [0.3617, 0.0502, 0.0485, 0.0724, 0.1355, 0.1003, 0.1269, 0.1044],
        [0.4551, 0.0362, 0.0363, 0.0680, 0.1139, 0.0889, 0.1031, 0.0986],
        [0.4273, 0.0341, 0.0345, 0.0520, 0.1114, 0.1016, 0.1074, 0.1316],
        [0.4948, 0.0313, 0.0323, 0.0435, 0.0982, 0.0971, 0.1030, 0.0998]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1093, 0.1369, 0.1185, 0.1251, 0.1387, 0.1201, 0.1262, 0.1252],
        [0.1281, 0.1058, 0.0908, 0.1361, 0.1393, 0.1387, 0.1474, 0.1138],
        [0.1027, 0.1433, 0.1317, 0.1274, 0.1230, 0.1189, 0.1158, 0.1373],
        [0.1312, 0.1111, 0.1032, 0.1228, 0.1308, 0.1578, 0.1384, 0.1048],
        [0.1351, 0.0801, 0.0784, 0.1332, 0.1350, 0.1228, 0.1466, 0.1688],
        [0.1103, 0.1230, 0.1273, 0.1334, 0.1373, 0.1195, 0.1164, 0.1328],
        [0.1264, 0.1168, 0.1144, 0.1284, 0.1353, 0.1376, 0.1292, 0.1118],
        [0.1369, 0.0796, 0.0833, 0.1399, 0.1421, 0.1456, 0.1358, 0.1367],
        [0.1441, 0.0691, 0.0819, 0.1290, 0.1212, 0.1413, 0.1769, 0.1364],
        [0.0988, 0.1288, 0.1205, 0.1462, 0.1290, 0.1384, 0.1344, 0.1039],
        [0.1112, 0.1119, 0.1069, 0.1462, 0.1579, 0.1214, 0.1209, 0.1236],
        [0.1345, 0.0811, 0.0830, 0.1524, 0.1466, 0.1336, 0.1345, 0.1344],
        [0.1505, 0.0693, 0.0785, 0.1336, 0.1280, 0.1514, 0.1571, 0.1317],
        [0.1696, 0.0717, 0.0749, 0.1178, 0.1427, 0.1569, 0.1166, 0.1500]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 36 [   0/390]  Loss: 0.2261 (0.226)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 5.351e-03
Train: 36 [  50/390]  Loss: 0.3737 (0.270)  Acc@1: 90.6250 (90.4718)  Acc@5: 100.0000 (99.8468)LR: 5.351e-03
Train: 36 [ 100/390]  Loss: 0.05411 (0.257)  Acc@1: 100.0000 (90.8106)  Acc@5: 100.0000 (99.8298)LR: 5.351e-03
Train: 36 [ 150/390]  Loss: 0.2665 (0.259)  Acc@1: 89.0625 (90.8526)  Acc@5: 100.0000 (99.8137)LR: 5.351e-03
Train: 36 [ 200/390]  Loss: 0.5013 (0.258)  Acc@1: 82.8125 (90.8271)  Acc@5: 98.4375 (99.7901)LR: 5.351e-03
Train: 36 [ 250/390]  Loss: 0.2575 (0.262)  Acc@1: 87.5000 (90.7371)  Acc@5: 100.0000 (99.8132)LR: 5.351e-03
Train: 36 [ 300/390]  Loss: 0.3041 (0.263)  Acc@1: 84.3750 (90.8378)  Acc@5: 100.0000 (99.7924)LR: 5.351e-03
Train: 36 [ 350/390]  Loss: 0.1731 (0.264)  Acc@1: 93.7500 (90.8387)  Acc@5: 100.0000 (99.7997)LR: 5.351e-03
Train: 36 [ 390/390]  Loss: 0.3835 (0.264)  Acc@1: 87.5000 (90.8840)  Acc@5: 100.0000 (99.7960)LR: 5.351e-03
train_acc 90.884000
Valid: 36 [   0/390]  Loss: 0.4717 (0.472)  Acc@1: 84.3750 (84.3750)  Acc@5: 100.0000 (100.0000)
Valid: 36 [  50/390]  Loss: 0.1330 (0.389)  Acc@1: 93.7500 (86.9485)  Acc@5: 100.0000 (99.6017)
Valid: 36 [ 100/390]  Loss: 0.1661 (0.396)  Acc@1: 92.1875 (86.9431)  Acc@5: 100.0000 (99.4740)
Valid: 36 [ 150/390]  Loss: 0.4388 (0.383)  Acc@1: 84.3750 (87.3655)  Acc@5: 98.4375 (99.5033)
Valid: 36 [ 200/390]  Loss: 0.3114 (0.385)  Acc@1: 90.6250 (87.4922)  Acc@5: 100.0000 (99.4869)
Valid: 36 [ 250/390]  Loss: 0.3015 (0.381)  Acc@1: 92.1875 (87.6992)  Acc@5: 98.4375 (99.4771)
Valid: 36 [ 300/390]  Loss: 0.3713 (0.375)  Acc@1: 87.5000 (87.8115)  Acc@5: 98.4375 (99.4965)
Valid: 36 [ 350/390]  Loss: 0.2320 (0.377)  Acc@1: 90.6250 (87.7270)  Acc@5: 100.0000 (99.5014)
Valid: 36 [ 390/390]  Loss: 0.1248 (0.380)  Acc@1: 95.0000 (87.6240)  Acc@5: 100.0000 (99.4960)
valid_acc 87.624000
epoch = 36   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1485, 0.0683, 0.0592, 0.1085, 0.2434, 0.1352, 0.1229, 0.1140],
        [0.2224, 0.0552, 0.0486, 0.0813, 0.1584, 0.1446, 0.1421, 0.1475],
        [0.1637, 0.0668, 0.0625, 0.1025, 0.1992, 0.1901, 0.1205, 0.0946],
        [0.3200, 0.0539, 0.0523, 0.0822, 0.1160, 0.1121, 0.1448, 0.1187],
        [0.2558, 0.0423, 0.0444, 0.0816, 0.1424, 0.1586, 0.1320, 0.1429],
        [0.2436, 0.0672, 0.0595, 0.0974, 0.1484, 0.1297, 0.1245, 0.1298],
        [0.3227, 0.0523, 0.0475, 0.0750, 0.1423, 0.1268, 0.1094, 0.1240],
        [0.3820, 0.0358, 0.0356, 0.0659, 0.1055, 0.1413, 0.1192, 0.1147],
        [0.4280, 0.0322, 0.0325, 0.0494, 0.1177, 0.1076, 0.1087, 0.1239],
        [0.3169, 0.0670, 0.0623, 0.1020, 0.1116, 0.1097, 0.1175, 0.1130],
        [0.3720, 0.0489, 0.0476, 0.0714, 0.1335, 0.0992, 0.1252, 0.1021],
        [0.4723, 0.0348, 0.0350, 0.0659, 0.1111, 0.0856, 0.0994, 0.0958],
        [0.4423, 0.0330, 0.0336, 0.0507, 0.1073, 0.0992, 0.1056, 0.1284],
        [0.5132, 0.0299, 0.0311, 0.0418, 0.0945, 0.0942, 0.0989, 0.0963]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1095, 0.1351, 0.1178, 0.1253, 0.1387, 0.1206, 0.1286, 0.1244],
        [0.1274, 0.1043, 0.0894, 0.1362, 0.1404, 0.1386, 0.1492, 0.1145],
        [0.1025, 0.1427, 0.1317, 0.1284, 0.1219, 0.1199, 0.1160, 0.1368],
        [0.1310, 0.1100, 0.1022, 0.1236, 0.1311, 0.1586, 0.1388, 0.1047],
        [0.1343, 0.0792, 0.0785, 0.1340, 0.1358, 0.1219, 0.1462, 0.1700],
        [0.1105, 0.1225, 0.1276, 0.1350, 0.1358, 0.1191, 0.1162, 0.1334],
        [0.1269, 0.1160, 0.1138, 0.1291, 0.1351, 0.1381, 0.1291, 0.1120],
        [0.1375, 0.0785, 0.0836, 0.1414, 0.1424, 0.1451, 0.1356, 0.1359],
        [0.1436, 0.0677, 0.0814, 0.1280, 0.1218, 0.1417, 0.1793, 0.1365],
        [0.0987, 0.1290, 0.1208, 0.1467, 0.1295, 0.1381, 0.1338, 0.1033],
        [0.1117, 0.1119, 0.1067, 0.1463, 0.1589, 0.1205, 0.1204, 0.1237],
        [0.1342, 0.0807, 0.0833, 0.1538, 0.1442, 0.1342, 0.1345, 0.1351],
        [0.1503, 0.0684, 0.0780, 0.1331, 0.1282, 0.1520, 0.1583, 0.1318],
        [0.1701, 0.0710, 0.0749, 0.1178, 0.1444, 0.1567, 0.1157, 0.1495]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 37 [   0/390]  Loss: 0.2842 (0.284)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)LR: 4.785e-03
Train: 37 [  50/390]  Loss: 0.2182 (0.274)  Acc@1: 89.0625 (90.2267)  Acc@5: 100.0000 (99.7549)LR: 4.785e-03
Train: 37 [ 100/390]  Loss: 0.2630 (0.275)  Acc@1: 87.5000 (90.2537)  Acc@5: 100.0000 (99.7989)LR: 4.785e-03
Train: 37 [ 150/390]  Loss: 0.1596 (0.271)  Acc@1: 93.7500 (90.3353)  Acc@5: 100.0000 (99.8137)LR: 4.785e-03
Train: 37 [ 200/390]  Loss: 0.3592 (0.272)  Acc@1: 90.6250 (90.4073)  Acc@5: 98.4375 (99.8134)LR: 4.785e-03
Train: 37 [ 250/390]  Loss: 0.2226 (0.273)  Acc@1: 92.1875 (90.4133)  Acc@5: 98.4375 (99.8195)LR: 4.785e-03
Train: 37 [ 300/390]  Loss: 0.3476 (0.274)  Acc@1: 89.0625 (90.4797)  Acc@5: 100.0000 (99.8183)LR: 4.785e-03
Train: 37 [ 350/390]  Loss: 0.2160 (0.271)  Acc@1: 93.7500 (90.5449)  Acc@5: 100.0000 (99.8264)LR: 4.785e-03
Train: 37 [ 390/390]  Loss: 0.08272 (0.270)  Acc@1: 97.5000 (90.5760)  Acc@5: 100.0000 (99.8280)LR: 4.785e-03
train_acc 90.576000
Valid: 37 [   0/390]  Loss: 0.5073 (0.507)  Acc@1: 82.8125 (82.8125)  Acc@5: 98.4375 (98.4375)
Valid: 37 [  50/390]  Loss: 0.2661 (0.402)  Acc@1: 87.5000 (86.7953)  Acc@5: 100.0000 (99.4485)
Valid: 37 [ 100/390]  Loss: 0.5790 (0.387)  Acc@1: 85.9375 (87.1906)  Acc@5: 100.0000 (99.5359)
Valid: 37 [ 150/390]  Loss: 0.4802 (0.374)  Acc@1: 82.8125 (87.6966)  Acc@5: 98.4375 (99.5550)
Valid: 37 [ 200/390]  Loss: 0.4367 (0.384)  Acc@1: 84.3750 (87.3989)  Acc@5: 100.0000 (99.5569)
Valid: 37 [ 250/390]  Loss: 0.2308 (0.382)  Acc@1: 95.3125 (87.4813)  Acc@5: 98.4375 (99.5331)
Valid: 37 [ 300/390]  Loss: 0.2420 (0.383)  Acc@1: 90.6250 (87.4585)  Acc@5: 100.0000 (99.5172)
Valid: 37 [ 350/390]  Loss: 0.2665 (0.386)  Acc@1: 90.6250 (87.4288)  Acc@5: 100.0000 (99.5059)
Valid: 37 [ 390/390]  Loss: 0.2522 (0.389)  Acc@1: 87.5000 (87.4200)  Acc@5: 100.0000 (99.5000)
valid_acc 87.420000
epoch = 37   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_5x5', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1489, 0.0666, 0.0583, 0.1080, 0.2452, 0.1346, 0.1235, 0.1150],
        [0.2261, 0.0534, 0.0474, 0.0796, 0.1595, 0.1446, 0.1418, 0.1476],
        [0.1651, 0.0654, 0.0621, 0.1031, 0.1980, 0.1906, 0.1207, 0.0950],
        [0.3279, 0.0523, 0.0512, 0.0809, 0.1149, 0.1121, 0.1432, 0.1176],
        [0.2612, 0.0413, 0.0439, 0.0813, 0.1436, 0.1570, 0.1303, 0.1414],
        [0.2493, 0.0662, 0.0591, 0.0979, 0.1466, 0.1289, 0.1229, 0.1292],
        [0.3333, 0.0509, 0.0466, 0.0740, 0.1403, 0.1235, 0.1088, 0.1226],
        [0.3931, 0.0349, 0.0350, 0.0653, 0.1025, 0.1396, 0.1174, 0.1121],
        [0.4389, 0.0313, 0.0318, 0.0484, 0.1159, 0.1046, 0.1054, 0.1238],
        [0.3244, 0.0660, 0.0617, 0.1021, 0.1105, 0.1089, 0.1161, 0.1104],
        [0.3851, 0.0479, 0.0469, 0.0706, 0.1303, 0.0968, 0.1227, 0.0998],
        [0.4880, 0.0338, 0.0343, 0.0648, 0.1073, 0.0830, 0.0958, 0.0930],
        [0.4546, 0.0320, 0.0329, 0.0499, 0.1047, 0.0969, 0.1030, 0.1261],
        [0.5299, 0.0288, 0.0302, 0.0408, 0.0914, 0.0905, 0.0950, 0.0933]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1086, 0.1351, 0.1182, 0.1246, 0.1398, 0.1209, 0.1295, 0.1234],
        [0.1276, 0.1026, 0.0879, 0.1352, 0.1425, 0.1392, 0.1490, 0.1159],
        [0.1020, 0.1433, 0.1330, 0.1279, 0.1209, 0.1195, 0.1157, 0.1376],
        [0.1312, 0.1082, 0.1007, 0.1238, 0.1309, 0.1611, 0.1395, 0.1045],
        [0.1353, 0.0781, 0.0780, 0.1348, 0.1368, 0.1211, 0.1456, 0.1702],
        [0.1107, 0.1225, 0.1285, 0.1361, 0.1357, 0.1186, 0.1146, 0.1333],
        [0.1267, 0.1147, 0.1130, 0.1290, 0.1352, 0.1388, 0.1296, 0.1130],
        [0.1376, 0.0776, 0.0833, 0.1425, 0.1428, 0.1434, 0.1363, 0.1365],
        [0.1444, 0.0662, 0.0806, 0.1276, 0.1231, 0.1413, 0.1803, 0.1364],
        [0.0982, 0.1297, 0.1221, 0.1475, 0.1286, 0.1363, 0.1349, 0.1026],
        [0.1115, 0.1108, 0.1061, 0.1459, 0.1600, 0.1209, 0.1209, 0.1237],
        [0.1328, 0.0798, 0.0831, 0.1549, 0.1450, 0.1335, 0.1349, 0.1360],
        [0.1507, 0.0672, 0.0776, 0.1335, 0.1286, 0.1529, 0.1571, 0.1325],
        [0.1715, 0.0698, 0.0744, 0.1183, 0.1445, 0.1563, 0.1159, 0.1493]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 38 [   0/390]  Loss: 0.2643 (0.264)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)LR: 4.252e-03
Train: 38 [  50/390]  Loss: 0.2599 (0.256)  Acc@1: 89.0625 (91.2377)  Acc@5: 100.0000 (99.8468)LR: 4.252e-03
Train: 38 [ 100/390]  Loss: 0.3158 (0.256)  Acc@1: 89.0625 (90.9499)  Acc@5: 100.0000 (99.8144)LR: 4.252e-03
Train: 38 [ 150/390]  Loss: 0.3078 (0.256)  Acc@1: 85.9375 (91.1527)  Acc@5: 100.0000 (99.7827)LR: 4.252e-03
Train: 38 [ 200/390]  Loss: 0.1307 (0.254)  Acc@1: 93.7500 (91.1925)  Acc@5: 100.0000 (99.7746)LR: 4.252e-03
Train: 38 [ 250/390]  Loss: 0.3361 (0.256)  Acc@1: 90.6250 (91.2413)  Acc@5: 100.0000 (99.7697)LR: 4.252e-03
Train: 38 [ 300/390]  Loss: 0.1559 (0.259)  Acc@1: 93.7500 (91.1078)  Acc@5: 100.0000 (99.7560)LR: 4.252e-03
Train: 38 [ 350/390]  Loss: 0.4060 (0.259)  Acc@1: 85.9375 (91.0746)  Acc@5: 98.4375 (99.7685)LR: 4.252e-03
Train: 38 [ 390/390]  Loss: 0.07857 (0.258)  Acc@1: 100.0000 (91.0800)  Acc@5: 100.0000 (99.7760)LR: 4.252e-03
train_acc 91.080000
Valid: 38 [   0/390]  Loss: 0.4069 (0.407)  Acc@1: 85.9375 (85.9375)  Acc@5: 100.0000 (100.0000)
Valid: 38 [  50/390]  Loss: 0.3660 (0.383)  Acc@1: 87.5000 (87.5613)  Acc@5: 100.0000 (99.5098)
Valid: 38 [ 100/390]  Loss: 0.2380 (0.378)  Acc@1: 93.7500 (87.5464)  Acc@5: 100.0000 (99.5823)
Valid: 38 [ 150/390]  Loss: 0.2869 (0.380)  Acc@1: 85.9375 (87.6242)  Acc@5: 100.0000 (99.5550)
Valid: 38 [ 200/390]  Loss: 0.3289 (0.373)  Acc@1: 89.0625 (87.8343)  Acc@5: 100.0000 (99.5336)
Valid: 38 [ 250/390]  Loss: 0.4620 (0.374)  Acc@1: 85.9375 (87.7864)  Acc@5: 100.0000 (99.5331)
Valid: 38 [ 300/390]  Loss: 0.1626 (0.373)  Acc@1: 95.3125 (87.8374)  Acc@5: 100.0000 (99.5172)
Valid: 38 [ 350/390]  Loss: 0.3000 (0.371)  Acc@1: 96.8750 (87.9363)  Acc@5: 98.4375 (99.5504)
Valid: 38 [ 390/390]  Loss: 0.1072 (0.370)  Acc@1: 95.0000 (87.8640)  Acc@5: 100.0000 (99.5360)
valid_acc 87.864000
epoch = 38   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('skip_connect', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1491, 0.0655, 0.0577, 0.1078, 0.2466, 0.1344, 0.1235, 0.1153],
        [0.2298, 0.0522, 0.0465, 0.0784, 0.1598, 0.1441, 0.1415, 0.1477],
        [0.1659, 0.0644, 0.0616, 0.1033, 0.1974, 0.1929, 0.1203, 0.0942],
        [0.3382, 0.0510, 0.0504, 0.0800, 0.1135, 0.1098, 0.1418, 0.1153],
        [0.2675, 0.0405, 0.0434, 0.0810, 0.1435, 0.1567, 0.1274, 0.1399],
        [0.2553, 0.0652, 0.0583, 0.0972, 0.1454, 0.1280, 0.1225, 0.1281],
        [0.3420, 0.0498, 0.0458, 0.0730, 0.1394, 0.1219, 0.1076, 0.1205],
        [0.4029, 0.0343, 0.0346, 0.0645, 0.0994, 0.1394, 0.1144, 0.1105],
        [0.4505, 0.0304, 0.0311, 0.0473, 0.1137, 0.1015, 0.1038, 0.1217],
        [0.3330, 0.0647, 0.0608, 0.1012, 0.1090, 0.1082, 0.1149, 0.1082],
        [0.3970, 0.0467, 0.0460, 0.0697, 0.1279, 0.0947, 0.1207, 0.0974],
        [0.5020, 0.0328, 0.0335, 0.0633, 0.1050, 0.0809, 0.0923, 0.0902],
        [0.4693, 0.0312, 0.0322, 0.0490, 0.1011, 0.0933, 0.1007, 0.1233],
        [0.5457, 0.0278, 0.0294, 0.0396, 0.0887, 0.0871, 0.0914, 0.0903]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1086, 0.1347, 0.1179, 0.1249, 0.1406, 0.1212, 0.1293, 0.1228],
        [0.1270, 0.1014, 0.0871, 0.1358, 0.1448, 0.1382, 0.1499, 0.1158],
        [0.1021, 0.1438, 0.1329, 0.1283, 0.1211, 0.1195, 0.1148, 0.1375],
        [0.1309, 0.1072, 0.1001, 0.1248, 0.1311, 0.1625, 0.1392, 0.1040],
        [0.1361, 0.0782, 0.0784, 0.1363, 0.1360, 0.1204, 0.1445, 0.1700],
        [0.1107, 0.1217, 0.1280, 0.1374, 0.1359, 0.1180, 0.1148, 0.1335],
        [0.1252, 0.1137, 0.1124, 0.1291, 0.1364, 0.1404, 0.1301, 0.1127],
        [0.1372, 0.0772, 0.0832, 0.1429, 0.1428, 0.1428, 0.1365, 0.1373],
        [0.1449, 0.0653, 0.0803, 0.1267, 0.1236, 0.1412, 0.1819, 0.1360],
        [0.0987, 0.1293, 0.1222, 0.1482, 0.1283, 0.1360, 0.1344, 0.1029],
        [0.1115, 0.1106, 0.1064, 0.1465, 0.1596, 0.1213, 0.1200, 0.1242],
        [0.1338, 0.0795, 0.0835, 0.1568, 0.1432, 0.1327, 0.1353, 0.1352],
        [0.1508, 0.0663, 0.0774, 0.1334, 0.1285, 0.1531, 0.1585, 0.1320],
        [0.1709, 0.0697, 0.0749, 0.1193, 0.1450, 0.1569, 0.1156, 0.1477]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 39 [   0/390]  Loss: 0.1828 (0.183)  Acc@1: 93.7500 (93.7500)  Acc@5: 100.0000 (100.0000)LR: 3.754e-03
Train: 39 [  50/390]  Loss: 0.3957 (0.255)  Acc@1: 84.3750 (91.0539)  Acc@5: 98.4375 (99.7549)LR: 3.754e-03
Train: 39 [ 100/390]  Loss: 0.3553 (0.255)  Acc@1: 87.5000 (91.1665)  Acc@5: 100.0000 (99.7679)LR: 3.754e-03
Train: 39 [ 150/390]  Loss: 0.2849 (0.254)  Acc@1: 89.0625 (91.2769)  Acc@5: 100.0000 (99.7724)LR: 3.754e-03
Train: 39 [ 200/390]  Loss: 0.1258 (0.253)  Acc@1: 96.8750 (91.3402)  Acc@5: 100.0000 (99.7979)LR: 3.754e-03
Train: 39 [ 250/390]  Loss: 0.1721 (0.251)  Acc@1: 92.1875 (91.4094)  Acc@5: 100.0000 (99.8257)LR: 3.754e-03
Train: 39 [ 300/390]  Loss: 0.3412 (0.251)  Acc@1: 90.6250 (91.3673)  Acc@5: 96.8750 (99.8339)LR: 3.754e-03
Train: 39 [ 350/390]  Loss: 0.3475 (0.251)  Acc@1: 89.0625 (91.3506)  Acc@5: 98.4375 (99.8264)LR: 3.754e-03
Train: 39 [ 390/390]  Loss: 0.1833 (0.252)  Acc@1: 95.0000 (91.3000)  Acc@5: 100.0000 (99.8240)LR: 3.754e-03
train_acc 91.300000
Valid: 39 [   0/390]  Loss: 0.5312 (0.531)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)
Valid: 39 [  50/390]  Loss: 0.6805 (0.379)  Acc@1: 76.5625 (87.5919)  Acc@5: 100.0000 (99.5098)
Valid: 39 [ 100/390]  Loss: 0.4387 (0.380)  Acc@1: 85.9375 (87.3608)  Acc@5: 100.0000 (99.5359)
Valid: 39 [ 150/390]  Loss: 0.2307 (0.370)  Acc@1: 90.6250 (87.6449)  Acc@5: 100.0000 (99.5033)
Valid: 39 [ 200/390]  Loss: 0.2999 (0.366)  Acc@1: 89.0625 (87.8576)  Acc@5: 98.4375 (99.4947)
Valid: 39 [ 250/390]  Loss: 0.2384 (0.368)  Acc@1: 89.0625 (87.8299)  Acc@5: 100.0000 (99.4833)
Valid: 39 [ 300/390]  Loss: 0.3001 (0.366)  Acc@1: 90.6250 (87.9049)  Acc@5: 100.0000 (99.4705)
Valid: 39 [ 350/390]  Loss: 0.1347 (0.369)  Acc@1: 96.8750 (87.8027)  Acc@5: 100.0000 (99.4480)
Valid: 39 [ 390/390]  Loss: 0.4542 (0.368)  Acc@1: 82.5000 (87.8040)  Acc@5: 100.0000 (99.4640)
valid_acc 87.804000
epoch = 39   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1502, 0.0644, 0.0574, 0.1082, 0.2482, 0.1333, 0.1228, 0.1156],
        [0.2324, 0.0511, 0.0459, 0.0774, 0.1606, 0.1431, 0.1409, 0.1485],
        [0.1684, 0.0633, 0.0613, 0.1034, 0.1961, 0.1924, 0.1212, 0.0939],
        [0.3469, 0.0500, 0.0497, 0.0788, 0.1118, 0.1095, 0.1402, 0.1132],
        [0.2714, 0.0397, 0.0427, 0.0799, 0.1439, 0.1565, 0.1261, 0.1398],
        [0.2626, 0.0637, 0.0576, 0.0969, 0.1442, 0.1273, 0.1208, 0.1268],
        [0.3519, 0.0486, 0.0449, 0.0719, 0.1378, 0.1194, 0.1065, 0.1190],
        [0.4146, 0.0332, 0.0338, 0.0631, 0.0961, 0.1380, 0.1118, 0.1095],
        [0.4623, 0.0295, 0.0302, 0.0461, 0.1120, 0.0984, 0.1018, 0.1196],
        [0.3433, 0.0630, 0.0600, 0.1009, 0.1072, 0.1050, 0.1143, 0.1063],
        [0.4094, 0.0456, 0.0453, 0.0687, 0.1270, 0.0919, 0.1172, 0.0948],
        [0.5164, 0.0316, 0.0324, 0.0611, 0.1021, 0.0789, 0.0889, 0.0886],
        [0.4838, 0.0302, 0.0314, 0.0477, 0.0975, 0.0917, 0.0983, 0.1195],
        [0.5609, 0.0271, 0.0287, 0.0387, 0.0851, 0.0841, 0.0878, 0.0877]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1086, 0.1342, 0.1180, 0.1254, 0.1406, 0.1217, 0.1298, 0.1217],
        [0.1266, 0.1000, 0.0862, 0.1352, 0.1477, 0.1381, 0.1510, 0.1152],
        [0.1023, 0.1437, 0.1335, 0.1281, 0.1210, 0.1196, 0.1143, 0.1374],
        [0.1319, 0.1067, 0.0998, 0.1245, 0.1306, 0.1633, 0.1395, 0.1036],
        [0.1358, 0.0774, 0.0778, 0.1361, 0.1360, 0.1197, 0.1447, 0.1725],
        [0.1110, 0.1213, 0.1280, 0.1385, 0.1360, 0.1169, 0.1149, 0.1335],
        [0.1256, 0.1132, 0.1123, 0.1296, 0.1360, 0.1406, 0.1301, 0.1125],
        [0.1366, 0.0762, 0.0823, 0.1420, 0.1442, 0.1422, 0.1379, 0.1387],
        [0.1454, 0.0641, 0.0797, 0.1257, 0.1243, 0.1408, 0.1833, 0.1368],
        [0.0983, 0.1284, 0.1222, 0.1497, 0.1279, 0.1368, 0.1344, 0.1023],
        [0.1116, 0.1101, 0.1060, 0.1461, 0.1607, 0.1215, 0.1195, 0.1246],
        [0.1332, 0.0789, 0.0831, 0.1568, 0.1451, 0.1330, 0.1356, 0.1344],
        [0.1506, 0.0654, 0.0774, 0.1336, 0.1291, 0.1533, 0.1588, 0.1319],
        [0.1703, 0.0688, 0.0744, 0.1193, 0.1470, 0.1560, 0.1167, 0.1475]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 40 [   0/390]  Loss: 0.2056 (0.206)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 3.292e-03
Train: 40 [  50/390]  Loss: 0.09841 (0.234)  Acc@1: 98.4375 (91.9424)  Acc@5: 100.0000 (99.9387)LR: 3.292e-03
Train: 40 [ 100/390]  Loss: 0.3358 (0.232)  Acc@1: 90.6250 (91.9864)  Acc@5: 100.0000 (99.9072)LR: 3.292e-03
Train: 40 [ 150/390]  Loss: 0.5711 (0.238)  Acc@1: 81.2500 (91.6701)  Acc@5: 100.0000 (99.8965)LR: 3.292e-03
Train: 40 [ 200/390]  Loss: 0.2939 (0.243)  Acc@1: 87.5000 (91.5423)  Acc@5: 100.0000 (99.8678)LR: 3.292e-03
Train: 40 [ 250/390]  Loss: 0.2827 (0.243)  Acc@1: 89.0625 (91.5837)  Acc@5: 100.0000 (99.8693)LR: 3.292e-03
Train: 40 [ 300/390]  Loss: 0.05092 (0.246)  Acc@1: 100.0000 (91.5075)  Acc@5: 100.0000 (99.8598)LR: 3.292e-03
Train: 40 [ 350/390]  Loss: 0.2801 (0.244)  Acc@1: 89.0625 (91.5109)  Acc@5: 100.0000 (99.8575)LR: 3.292e-03
Train: 40 [ 390/390]  Loss: 0.1642 (0.244)  Acc@1: 92.5000 (91.5360)  Acc@5: 100.0000 (99.8480)LR: 3.292e-03
train_acc 91.536000
Valid: 40 [   0/390]  Loss: 0.3070 (0.307)  Acc@1: 90.6250 (90.6250)  Acc@5: 98.4375 (98.4375)
Valid: 40 [  50/390]  Loss: 0.1995 (0.370)  Acc@1: 90.6250 (87.1630)  Acc@5: 100.0000 (99.2953)
Valid: 40 [ 100/390]  Loss: 0.1673 (0.365)  Acc@1: 93.7500 (87.5928)  Acc@5: 100.0000 (99.5050)
Valid: 40 [ 150/390]  Loss: 0.3572 (0.369)  Acc@1: 92.1875 (87.7794)  Acc@5: 98.4375 (99.5344)
Valid: 40 [ 200/390]  Loss: 0.3298 (0.371)  Acc@1: 87.5000 (87.7332)  Acc@5: 100.0000 (99.5103)
Valid: 40 [ 250/390]  Loss: 0.6325 (0.370)  Acc@1: 84.3750 (87.8922)  Acc@5: 95.3125 (99.4522)
Valid: 40 [ 300/390]  Loss: 0.4242 (0.369)  Acc@1: 81.2500 (87.8893)  Acc@5: 98.4375 (99.4705)
Valid: 40 [ 350/390]  Loss: 0.1762 (0.368)  Acc@1: 93.7500 (87.9274)  Acc@5: 100.0000 (99.4747)
Valid: 40 [ 390/390]  Loss: 0.1573 (0.369)  Acc@1: 95.0000 (87.9120)  Acc@5: 100.0000 (99.4800)
valid_acc 87.912000
epoch = 40   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1506, 0.0634, 0.0568, 0.1085, 0.2479, 0.1341, 0.1230, 0.1157],
        [0.2369, 0.0499, 0.0449, 0.0759, 0.1597, 0.1438, 0.1410, 0.1479],
        [0.1693, 0.0627, 0.0610, 0.1040, 0.1966, 0.1921, 0.1214, 0.0930],
        [0.3564, 0.0489, 0.0488, 0.0778, 0.1104, 0.1075, 0.1392, 0.1110],
        [0.2788, 0.0388, 0.0420, 0.0794, 0.1447, 0.1541, 0.1244, 0.1378],
        [0.2677, 0.0626, 0.0570, 0.0966, 0.1439, 0.1267, 0.1194, 0.1261],
        [0.3618, 0.0478, 0.0443, 0.0712, 0.1360, 0.1172, 0.1045, 0.1172],
        [0.4263, 0.0324, 0.0330, 0.0620, 0.0938, 0.1350, 0.1096, 0.1080],
        [0.4753, 0.0288, 0.0297, 0.0454, 0.1094, 0.0953, 0.0993, 0.1168],
        [0.3517, 0.0618, 0.0592, 0.1006, 0.1054, 0.1038, 0.1129, 0.1045],
        [0.4197, 0.0446, 0.0444, 0.0677, 0.1260, 0.0899, 0.1163, 0.0914],
        [0.5303, 0.0307, 0.0316, 0.0596, 0.0994, 0.0769, 0.0859, 0.0856],
        [0.4959, 0.0292, 0.0306, 0.0466, 0.0943, 0.0904, 0.0970, 0.1161],
        [0.5770, 0.0262, 0.0279, 0.0378, 0.0813, 0.0812, 0.0840, 0.0845]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1086, 0.1330, 0.1174, 0.1249, 0.1413, 0.1228, 0.1312, 0.1210],
        [0.1262, 0.0989, 0.0848, 0.1353, 0.1489, 0.1384, 0.1528, 0.1148],
        [0.1024, 0.1424, 0.1328, 0.1292, 0.1213, 0.1197, 0.1146, 0.1376],
        [0.1325, 0.1050, 0.0982, 0.1254, 0.1302, 0.1643, 0.1407, 0.1038],
        [0.1345, 0.0767, 0.0776, 0.1359, 0.1366, 0.1189, 0.1451, 0.1747],
        [0.1117, 0.1203, 0.1270, 0.1406, 0.1354, 0.1165, 0.1146, 0.1338],
        [0.1263, 0.1121, 0.1111, 0.1312, 0.1361, 0.1396, 0.1308, 0.1129],
        [0.1344, 0.0750, 0.0815, 0.1408, 0.1463, 0.1427, 0.1385, 0.1410],
        [0.1450, 0.0633, 0.0793, 0.1254, 0.1250, 0.1419, 0.1833, 0.1368],
        [0.0995, 0.1271, 0.1211, 0.1505, 0.1286, 0.1358, 0.1362, 0.1013],
        [0.1120, 0.1094, 0.1055, 0.1468, 0.1606, 0.1216, 0.1203, 0.1239],
        [0.1330, 0.0778, 0.0827, 0.1564, 0.1452, 0.1338, 0.1365, 0.1346],
        [0.1497, 0.0642, 0.0766, 0.1322, 0.1305, 0.1542, 0.1607, 0.1320],
        [0.1701, 0.0677, 0.0739, 0.1185, 0.1482, 0.1570, 0.1175, 0.1471]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 41 [   0/390]  Loss: 0.1748 (0.175)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 2.868e-03
Train: 41 [  50/390]  Loss: 0.3314 (0.259)  Acc@1: 89.0625 (91.5441)  Acc@5: 100.0000 (99.7549)LR: 2.868e-03
Train: 41 [ 100/390]  Loss: 0.2766 (0.246)  Acc@1: 87.5000 (91.5377)  Acc@5: 100.0000 (99.7834)LR: 2.868e-03
Train: 41 [ 150/390]  Loss: 0.2303 (0.244)  Acc@1: 92.1875 (91.5873)  Acc@5: 100.0000 (99.8448)LR: 2.868e-03
Train: 41 [ 200/390]  Loss: 0.3467 (0.242)  Acc@1: 81.2500 (91.4723)  Acc@5: 100.0000 (99.8756)LR: 2.868e-03
Train: 41 [ 250/390]  Loss: 0.1440 (0.249)  Acc@1: 92.1875 (91.2351)  Acc@5: 100.0000 (99.8630)LR: 2.868e-03
Train: 41 [ 300/390]  Loss: 0.1704 (0.248)  Acc@1: 95.3125 (91.3258)  Acc@5: 100.0000 (99.8391)LR: 2.868e-03
Train: 41 [ 350/390]  Loss: 0.4637 (0.245)  Acc@1: 85.9375 (91.3640)  Acc@5: 100.0000 (99.8486)LR: 2.868e-03
Train: 41 [ 390/390]  Loss: 0.4046 (0.244)  Acc@1: 87.5000 (91.3960)  Acc@5: 97.5000 (99.8440)LR: 2.868e-03
train_acc 91.396000
Valid: 41 [   0/390]  Loss: 0.4148 (0.415)  Acc@1: 84.3750 (84.3750)  Acc@5: 98.4375 (98.4375)
Valid: 41 [  50/390]  Loss: 0.4803 (0.372)  Acc@1: 84.3750 (87.0711)  Acc@5: 98.4375 (99.5711)
Valid: 41 [ 100/390]  Loss: 0.3368 (0.383)  Acc@1: 87.5000 (86.9740)  Acc@5: 100.0000 (99.5978)
Valid: 41 [ 150/390]  Loss: 0.2689 (0.388)  Acc@1: 87.5000 (86.9619)  Acc@5: 100.0000 (99.4723)
Valid: 41 [ 200/390]  Loss: 0.3488 (0.381)  Acc@1: 84.3750 (87.2901)  Acc@5: 100.0000 (99.4792)
Valid: 41 [ 250/390]  Loss: 0.2047 (0.376)  Acc@1: 89.0625 (87.4440)  Acc@5: 100.0000 (99.4833)
Valid: 41 [ 300/390]  Loss: 0.3557 (0.381)  Acc@1: 87.5000 (87.3806)  Acc@5: 100.0000 (99.4705)
Valid: 41 [ 350/390]  Loss: 0.5602 (0.383)  Acc@1: 82.8125 (87.3264)  Acc@5: 98.4375 (99.4703)
Valid: 41 [ 390/390]  Loss: 0.3535 (0.387)  Acc@1: 90.0000 (87.2720)  Acc@5: 100.0000 (99.4720)
valid_acc 87.272000
epoch = 41   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_5x5', 3)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1504, 0.0619, 0.0563, 0.1082, 0.2494, 0.1349, 0.1228, 0.1161],
        [0.2421, 0.0485, 0.0443, 0.0749, 0.1601, 0.1420, 0.1404, 0.1478],
        [0.1712, 0.0616, 0.0608, 0.1044, 0.1955, 0.1922, 0.1211, 0.0932],
        [0.3669, 0.0478, 0.0485, 0.0772, 0.1078, 0.1046, 0.1379, 0.1093],
        [0.2856, 0.0380, 0.0418, 0.0795, 0.1446, 0.1516, 0.1221, 0.1367],
        [0.2739, 0.0612, 0.0565, 0.0963, 0.1443, 0.1248, 0.1175, 0.1255],
        [0.3720, 0.0464, 0.0437, 0.0702, 0.1348, 0.1148, 0.1028, 0.1153],
        [0.4381, 0.0315, 0.0325, 0.0611, 0.0906, 0.1320, 0.1070, 0.1072],
        [0.4893, 0.0277, 0.0290, 0.0443, 0.1064, 0.0926, 0.0962, 0.1145],
        [0.3608, 0.0605, 0.0588, 0.1005, 0.1034, 0.1020, 0.1108, 0.1031],
        [0.4310, 0.0437, 0.0440, 0.0673, 0.1241, 0.0874, 0.1135, 0.0891],
        [0.5450, 0.0299, 0.0312, 0.0586, 0.0952, 0.0746, 0.0830, 0.0824],
        [0.5108, 0.0283, 0.0300, 0.0458, 0.0908, 0.0883, 0.0938, 0.1122],
        [0.5942, 0.0255, 0.0274, 0.0371, 0.0775, 0.0773, 0.0792, 0.0817]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1085, 0.1326, 0.1175, 0.1252, 0.1414, 0.1227, 0.1308, 0.1212],
        [0.1266, 0.0980, 0.0839, 0.1344, 0.1502, 0.1390, 0.1527, 0.1152],
        [0.1027, 0.1419, 0.1325, 0.1300, 0.1206, 0.1203, 0.1150, 0.1370],
        [0.1326, 0.1041, 0.0972, 0.1242, 0.1299, 0.1672, 0.1414, 0.1033],
        [0.1350, 0.0754, 0.0763, 0.1346, 0.1370, 0.1196, 0.1454, 0.1766],
        [0.1116, 0.1197, 0.1268, 0.1417, 0.1353, 0.1171, 0.1140, 0.1338],
        [0.1262, 0.1118, 0.1106, 0.1316, 0.1371, 0.1386, 0.1315, 0.1125],
        [0.1344, 0.0739, 0.0808, 0.1403, 0.1472, 0.1425, 0.1391, 0.1417],
        [0.1458, 0.0626, 0.0793, 0.1255, 0.1250, 0.1421, 0.1819, 0.1377],
        [0.0991, 0.1267, 0.1204, 0.1511, 0.1292, 0.1364, 0.1363, 0.1006],
        [0.1118, 0.1089, 0.1047, 0.1462, 0.1610, 0.1226, 0.1208, 0.1241],
        [0.1330, 0.0771, 0.0819, 0.1557, 0.1453, 0.1333, 0.1381, 0.1357],
        [0.1488, 0.0639, 0.0764, 0.1319, 0.1316, 0.1546, 0.1606, 0.1322],
        [0.1690, 0.0674, 0.0739, 0.1190, 0.1495, 0.1573, 0.1168, 0.1472]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 42 [   0/390]  Loss: 0.3308 (0.331)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 2.484e-03
Train: 42 [  50/390]  Loss: 0.2057 (0.241)  Acc@1: 93.7500 (91.6973)  Acc@5: 100.0000 (99.9081)LR: 2.484e-03
Train: 42 [ 100/390]  Loss: 0.2165 (0.245)  Acc@1: 93.7500 (91.5377)  Acc@5: 100.0000 (99.8917)LR: 2.484e-03
Train: 42 [ 150/390]  Loss: 0.1949 (0.238)  Acc@1: 92.1875 (91.7839)  Acc@5: 100.0000 (99.8551)LR: 2.484e-03
Train: 42 [ 200/390]  Loss: 0.2925 (0.241)  Acc@1: 93.7500 (91.5967)  Acc@5: 100.0000 (99.8523)LR: 2.484e-03
Train: 42 [ 250/390]  Loss: 0.2391 (0.240)  Acc@1: 92.1875 (91.6770)  Acc@5: 100.0000 (99.8506)LR: 2.484e-03
Train: 42 [ 300/390]  Loss: 0.09861 (0.243)  Acc@1: 98.4375 (91.5023)  Acc@5: 100.0000 (99.8495)LR: 2.484e-03
Train: 42 [ 350/390]  Loss: 0.07411 (0.242)  Acc@1: 96.8750 (91.5465)  Acc@5: 100.0000 (99.8353)LR: 2.484e-03
Train: 42 [ 390/390]  Loss: 0.2919 (0.243)  Acc@1: 90.0000 (91.5400)  Acc@5: 100.0000 (99.8240)LR: 2.484e-03
train_acc 91.540000
Valid: 42 [   0/390]  Loss: 0.4749 (0.475)  Acc@1: 84.3750 (84.3750)  Acc@5: 98.4375 (98.4375)
Valid: 42 [  50/390]  Loss: 0.3744 (0.373)  Acc@1: 85.9375 (87.9289)  Acc@5: 100.0000 (99.6630)
Valid: 42 [ 100/390]  Loss: 0.4228 (0.370)  Acc@1: 89.0625 (88.0879)  Acc@5: 100.0000 (99.6751)
Valid: 42 [ 150/390]  Loss: 0.2370 (0.368)  Acc@1: 89.0625 (87.9139)  Acc@5: 100.0000 (99.5240)
Valid: 42 [ 200/390]  Loss: 0.4677 (0.370)  Acc@1: 84.3750 (87.9431)  Acc@5: 100.0000 (99.5025)
Valid: 42 [ 250/390]  Loss: 0.3416 (0.372)  Acc@1: 89.0625 (87.9358)  Acc@5: 100.0000 (99.5144)
Valid: 42 [ 300/390]  Loss: 0.4292 (0.378)  Acc@1: 84.3750 (87.7284)  Acc@5: 100.0000 (99.5120)
Valid: 42 [ 350/390]  Loss: 0.4744 (0.376)  Acc@1: 85.9375 (87.7537)  Acc@5: 98.4375 (99.5192)
Valid: 42 [ 390/390]  Loss: 0.2212 (0.375)  Acc@1: 90.0000 (87.7840)  Acc@5: 100.0000 (99.5320)
valid_acc 87.784000
epoch = 42   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1502, 0.0608, 0.0554, 0.1074, 0.2518, 0.1360, 0.1231, 0.1153],
        [0.2481, 0.0473, 0.0435, 0.0737, 0.1601, 0.1402, 0.1405, 0.1465],
        [0.1727, 0.0607, 0.0600, 0.1040, 0.1955, 0.1929, 0.1214, 0.0927],
        [0.3786, 0.0466, 0.0475, 0.0761, 0.1061, 0.1019, 0.1367, 0.1063],
        [0.2924, 0.0372, 0.0411, 0.0787, 0.1446, 0.1502, 0.1215, 0.1344],
        [0.2804, 0.0605, 0.0559, 0.0961, 0.1427, 0.1236, 0.1159, 0.1250],
        [0.3844, 0.0454, 0.0429, 0.0695, 0.1332, 0.1117, 0.1000, 0.1128],
        [0.4494, 0.0308, 0.0320, 0.0605, 0.0875, 0.1299, 0.1041, 0.1058],
        [0.5021, 0.0270, 0.0283, 0.0434, 0.1038, 0.0898, 0.0933, 0.1123],
        [0.3704, 0.0596, 0.0582, 0.1005, 0.1007, 0.1007, 0.1091, 0.1008],
        [0.4441, 0.0424, 0.0432, 0.0664, 0.1206, 0.0855, 0.1110, 0.0868],
        [0.5595, 0.0291, 0.0305, 0.0575, 0.0916, 0.0721, 0.0802, 0.0795],
        [0.5273, 0.0276, 0.0294, 0.0451, 0.0874, 0.0848, 0.0911, 0.1073],
        [0.6092, 0.0248, 0.0268, 0.0364, 0.0743, 0.0745, 0.0756, 0.0784]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1085, 0.1328, 0.1180, 0.1260, 0.1422, 0.1213, 0.1307, 0.1204],
        [0.1265, 0.0972, 0.0834, 0.1333, 0.1521, 0.1385, 0.1538, 0.1152],
        [0.1020, 0.1407, 0.1320, 0.1293, 0.1212, 0.1207, 0.1147, 0.1392],
        [0.1323, 0.1030, 0.0965, 0.1236, 0.1306, 0.1677, 0.1429, 0.1033],
        [0.1352, 0.0745, 0.0761, 0.1349, 0.1382, 0.1200, 0.1451, 0.1760],
        [0.1120, 0.1194, 0.1270, 0.1428, 0.1354, 0.1166, 0.1134, 0.1333],
        [0.1254, 0.1113, 0.1104, 0.1330, 0.1375, 0.1380, 0.1316, 0.1127],
        [0.1352, 0.0733, 0.0811, 0.1418, 0.1466, 0.1414, 0.1402, 0.1404],
        [0.1470, 0.0622, 0.0791, 0.1258, 0.1246, 0.1413, 0.1821, 0.1378],
        [0.0988, 0.1268, 0.1206, 0.1528, 0.1296, 0.1361, 0.1355, 0.0998],
        [0.1114, 0.1082, 0.1043, 0.1452, 0.1625, 0.1226, 0.1215, 0.1242],
        [0.1325, 0.0763, 0.0816, 0.1557, 0.1460, 0.1338, 0.1386, 0.1356],
        [0.1488, 0.0638, 0.0765, 0.1330, 0.1311, 0.1554, 0.1606, 0.1308],
        [0.1694, 0.0669, 0.0739, 0.1193, 0.1497, 0.1578, 0.1164, 0.1467]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 43 [   0/390]  Loss: 0.1047 (0.105)  Acc@1: 95.3125 (95.3125)  Acc@5: 100.0000 (100.0000)LR: 2.142e-03
Train: 43 [  50/390]  Loss: 0.2210 (0.249)  Acc@1: 90.6250 (91.3603)  Acc@5: 100.0000 (99.7549)LR: 2.142e-03
Train: 43 [ 100/390]  Loss: 0.2380 (0.258)  Acc@1: 92.1875 (91.1665)  Acc@5: 100.0000 (99.7525)LR: 2.142e-03
Train: 43 [ 150/390]  Loss: 0.1490 (0.253)  Acc@1: 92.1875 (91.1113)  Acc@5: 100.0000 (99.7930)LR: 2.142e-03
Train: 43 [ 200/390]  Loss: 0.2007 (0.248)  Acc@1: 92.1875 (91.3402)  Acc@5: 100.0000 (99.8134)LR: 2.142e-03
Train: 43 [ 250/390]  Loss: 0.2435 (0.246)  Acc@1: 90.6250 (91.3596)  Acc@5: 100.0000 (99.8195)LR: 2.142e-03
Train: 43 [ 300/390]  Loss: 0.1831 (0.252)  Acc@1: 93.7500 (91.2064)  Acc@5: 100.0000 (99.8079)LR: 2.142e-03
Train: 43 [ 350/390]  Loss: 0.3223 (0.249)  Acc@1: 92.1875 (91.3595)  Acc@5: 98.4375 (99.7997)LR: 2.142e-03
Train: 43 [ 390/390]  Loss: 0.2297 (0.250)  Acc@1: 92.5000 (91.3480)  Acc@5: 100.0000 (99.7760)LR: 2.142e-03
train_acc 91.348000
Valid: 43 [   0/390]  Loss: 0.4416 (0.442)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)
Valid: 43 [  50/390]  Loss: 0.3052 (0.389)  Acc@1: 89.0625 (87.1017)  Acc@5: 100.0000 (99.6017)
Valid: 43 [ 100/390]  Loss: 0.2121 (0.371)  Acc@1: 89.0625 (87.7321)  Acc@5: 100.0000 (99.6132)
Valid: 43 [ 150/390]  Loss: 0.2089 (0.368)  Acc@1: 92.1875 (87.8001)  Acc@5: 100.0000 (99.5861)
Valid: 43 [ 200/390]  Loss: 0.6411 (0.369)  Acc@1: 78.1250 (87.7643)  Acc@5: 98.4375 (99.5414)
Valid: 43 [ 250/390]  Loss: 0.3259 (0.368)  Acc@1: 89.0625 (87.8548)  Acc@5: 100.0000 (99.5207)
Valid: 43 [ 300/390]  Loss: 0.3299 (0.370)  Acc@1: 89.0625 (87.7647)  Acc@5: 100.0000 (99.5017)
Valid: 43 [ 350/390]  Loss: 0.6850 (0.370)  Acc@1: 79.6875 (87.7715)  Acc@5: 98.4375 (99.4925)
Valid: 43 [ 390/390]  Loss: 0.5886 (0.368)  Acc@1: 77.5000 (87.8440)  Acc@5: 97.5000 (99.4680)
valid_acc 87.844000
epoch = 43   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('sep_conv_3x3', 1), ('dil_conv_3x3', 3)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1499, 0.0593, 0.0547, 0.1069, 0.2547, 0.1367, 0.1230, 0.1148],
        [0.2529, 0.0459, 0.0427, 0.0724, 0.1598, 0.1397, 0.1400, 0.1467],
        [0.1743, 0.0597, 0.0595, 0.1040, 0.1966, 0.1921, 0.1215, 0.0923],
        [0.3899, 0.0453, 0.0467, 0.0751, 0.1053, 0.0997, 0.1344, 0.1035],
        [0.2994, 0.0363, 0.0407, 0.0782, 0.1445, 0.1480, 0.1207, 0.1322],
        [0.2878, 0.0591, 0.0553, 0.0961, 0.1415, 0.1223, 0.1140, 0.1238],
        [0.3972, 0.0441, 0.0423, 0.0688, 0.1304, 0.1086, 0.0977, 0.1109],
        [0.4598, 0.0301, 0.0316, 0.0597, 0.0851, 0.1282, 0.1017, 0.1039],
        [0.5157, 0.0263, 0.0277, 0.0424, 0.1005, 0.0873, 0.0908, 0.1093],
        [0.3797, 0.0582, 0.0576, 0.0998, 0.0991, 0.0992, 0.1075, 0.0990],
        [0.4550, 0.0413, 0.0424, 0.0653, 0.1185, 0.0831, 0.1099, 0.0845],
        [0.5725, 0.0282, 0.0300, 0.0560, 0.0889, 0.0699, 0.0775, 0.0770],
        [0.5389, 0.0269, 0.0290, 0.0443, 0.0854, 0.0826, 0.0896, 0.1033],
        [0.6242, 0.0241, 0.0262, 0.0356, 0.0714, 0.0716, 0.0717, 0.0751]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1089, 0.1316, 0.1181, 0.1262, 0.1428, 0.1210, 0.1316, 0.1196],
        [0.1255, 0.0961, 0.0827, 0.1331, 0.1550, 0.1380, 0.1540, 0.1155],
        [0.1027, 0.1406, 0.1324, 0.1291, 0.1206, 0.1214, 0.1140, 0.1391],
        [0.1322, 0.1023, 0.0962, 0.1235, 0.1305, 0.1693, 0.1433, 0.1027],
        [0.1351, 0.0735, 0.0761, 0.1352, 0.1392, 0.1203, 0.1447, 0.1760],
        [0.1132, 0.1183, 0.1265, 0.1435, 0.1350, 0.1172, 0.1124, 0.1340],
        [0.1249, 0.1108, 0.1100, 0.1341, 0.1382, 0.1379, 0.1322, 0.1119],
        [0.1347, 0.0727, 0.0808, 0.1415, 0.1466, 0.1412, 0.1411, 0.1413],
        [0.1464, 0.0615, 0.0783, 0.1244, 0.1261, 0.1419, 0.1831, 0.1383],
        [0.0997, 0.1257, 0.1201, 0.1534, 0.1293, 0.1367, 0.1355, 0.0997],
        [0.1109, 0.1082, 0.1052, 0.1438, 0.1628, 0.1226, 0.1224, 0.1243],
        [0.1311, 0.0754, 0.0818, 0.1562, 0.1468, 0.1336, 0.1395, 0.1357],
        [0.1489, 0.0633, 0.0763, 0.1332, 0.1308, 0.1549, 0.1623, 0.1303],
        [0.1689, 0.0664, 0.0743, 0.1200, 0.1514, 0.1572, 0.1143, 0.1476]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 44 [   0/390]  Loss: 0.2291 (0.229)  Acc@1: 95.3125 (95.3125)  Acc@5: 98.4375 (98.4375)LR: 1.843e-03
Train: 44 [  50/390]  Loss: 0.3156 (0.233)  Acc@1: 87.5000 (92.3100)  Acc@5: 98.4375 (99.7855)LR: 1.843e-03
Train: 44 [ 100/390]  Loss: 0.2701 (0.230)  Acc@1: 92.1875 (92.2184)  Acc@5: 100.0000 (99.8298)LR: 1.843e-03
Train: 44 [ 150/390]  Loss: 0.3204 (0.231)  Acc@1: 90.6250 (92.1358)  Acc@5: 100.0000 (99.8448)LR: 1.843e-03
Train: 44 [ 200/390]  Loss: 0.1040 (0.235)  Acc@1: 96.8750 (91.9932)  Acc@5: 100.0000 (99.8445)LR: 1.843e-03
Train: 44 [ 250/390]  Loss: 0.2010 (0.234)  Acc@1: 90.6250 (91.9323)  Acc@5: 100.0000 (99.8381)LR: 1.843e-03
Train: 44 [ 300/390]  Loss: 0.2756 (0.233)  Acc@1: 89.0625 (91.9279)  Acc@5: 100.0000 (99.8547)LR: 1.843e-03
Train: 44 [ 350/390]  Loss: 0.4295 (0.236)  Acc@1: 85.9375 (91.8714)  Acc@5: 100.0000 (99.8486)LR: 1.843e-03
Train: 44 [ 390/390]  Loss: 0.08672 (0.237)  Acc@1: 100.0000 (91.8480)  Acc@5: 100.0000 (99.8360)LR: 1.843e-03
train_acc 91.848000
Valid: 44 [   0/390]  Loss: 0.4265 (0.427)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)
Valid: 44 [  50/390]  Loss: 0.2842 (0.341)  Acc@1: 92.1875 (88.8480)  Acc@5: 100.0000 (99.5098)
Valid: 44 [ 100/390]  Loss: 0.5425 (0.352)  Acc@1: 87.5000 (88.2735)  Acc@5: 98.4375 (99.5823)
Valid: 44 [ 150/390]  Loss: 0.7386 (0.350)  Acc@1: 75.0000 (88.3485)  Acc@5: 96.8750 (99.5344)
Valid: 44 [ 200/390]  Loss: 0.4525 (0.351)  Acc@1: 82.8125 (88.4173)  Acc@5: 98.4375 (99.5258)
Valid: 44 [ 250/390]  Loss: 0.3376 (0.355)  Acc@1: 89.0625 (88.3840)  Acc@5: 98.4375 (99.5207)
Valid: 44 [ 300/390]  Loss: 0.1907 (0.354)  Acc@1: 93.7500 (88.3980)  Acc@5: 100.0000 (99.5328)
Valid: 44 [ 350/390]  Loss: 0.5642 (0.352)  Acc@1: 84.3750 (88.3903)  Acc@5: 100.0000 (99.5593)
Valid: 44 [ 390/390]  Loss: 0.1827 (0.354)  Acc@1: 97.5000 (88.2920)  Acc@5: 100.0000 (99.5320)
valid_acc 88.292000
epoch = 44   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1498, 0.0579, 0.0541, 0.1063, 0.2571, 0.1373, 0.1223, 0.1153],
        [0.2574, 0.0446, 0.0418, 0.0711, 0.1607, 0.1384, 0.1408, 0.1451],
        [0.1765, 0.0585, 0.0593, 0.1042, 0.1965, 0.1909, 0.1217, 0.0923],
        [0.3996, 0.0441, 0.0457, 0.0734, 0.1046, 0.0974, 0.1334, 0.1019],
        [0.3040, 0.0356, 0.0403, 0.0775, 0.1451, 0.1483, 0.1184, 0.1307],
        [0.2962, 0.0577, 0.0548, 0.0959, 0.1395, 0.1215, 0.1122, 0.1223],
        [0.4074, 0.0431, 0.0418, 0.0678, 0.1297, 0.1054, 0.0960, 0.1089],
        [0.4702, 0.0294, 0.0313, 0.0586, 0.0827, 0.1256, 0.1000, 0.1022],
        [0.5287, 0.0256, 0.0272, 0.0416, 0.0976, 0.0845, 0.0881, 0.1067],
        [0.3897, 0.0570, 0.0569, 0.0994, 0.0962, 0.0972, 0.1058, 0.0978],
        [0.4674, 0.0404, 0.0417, 0.0643, 0.1157, 0.0811, 0.1066, 0.0827],
        [0.5858, 0.0275, 0.0295, 0.0548, 0.0863, 0.0673, 0.0744, 0.0743],
        [0.5523, 0.0264, 0.0286, 0.0437, 0.0825, 0.0807, 0.0869, 0.0989],
        [0.6384, 0.0235, 0.0257, 0.0348, 0.0688, 0.0689, 0.0678, 0.0721]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1080, 0.1301, 0.1175, 0.1261, 0.1444, 0.1219, 0.1325, 0.1195],
        [0.1262, 0.0949, 0.0819, 0.1318, 0.1574, 0.1382, 0.1540, 0.1155],
        [0.1031, 0.1403, 0.1325, 0.1292, 0.1194, 0.1216, 0.1141, 0.1398],
        [0.1321, 0.1016, 0.0962, 0.1229, 0.1308, 0.1697, 0.1445, 0.1023],
        [0.1350, 0.0726, 0.0763, 0.1356, 0.1391, 0.1216, 0.1447, 0.1752],
        [0.1134, 0.1172, 0.1262, 0.1449, 0.1347, 0.1179, 0.1121, 0.1335],
        [0.1251, 0.1102, 0.1097, 0.1346, 0.1374, 0.1386, 0.1321, 0.1123],
        [0.1345, 0.0717, 0.0809, 0.1416, 0.1471, 0.1411, 0.1412, 0.1419],
        [0.1466, 0.0607, 0.0779, 0.1241, 0.1257, 0.1423, 0.1838, 0.1389],
        [0.0999, 0.1247, 0.1198, 0.1527, 0.1293, 0.1373, 0.1360, 0.1003],
        [0.1117, 0.1072, 0.1047, 0.1437, 0.1617, 0.1232, 0.1226, 0.1252],
        [0.1289, 0.0745, 0.0820, 0.1561, 0.1486, 0.1347, 0.1397, 0.1355],
        [0.1493, 0.0627, 0.0762, 0.1335, 0.1310, 0.1556, 0.1617, 0.1300],
        [0.1675, 0.0655, 0.0742, 0.1198, 0.1539, 0.1576, 0.1137, 0.1478]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 45 [   0/390]  Loss: 0.3865 (0.387)  Acc@1: 87.5000 (87.5000)  Acc@5: 100.0000 (100.0000)LR: 1.587e-03
Train: 45 [  50/390]  Loss: 0.2185 (0.250)  Acc@1: 93.7500 (91.1765)  Acc@5: 100.0000 (99.6017)LR: 1.587e-03
Train: 45 [ 100/390]  Loss: 0.1940 (0.248)  Acc@1: 92.1875 (91.1665)  Acc@5: 100.0000 (99.7215)LR: 1.587e-03
Train: 45 [ 150/390]  Loss: 0.1546 (0.252)  Acc@1: 95.3125 (90.9251)  Acc@5: 100.0000 (99.7827)LR: 1.587e-03
Train: 45 [ 200/390]  Loss: 0.3987 (0.252)  Acc@1: 87.5000 (90.9437)  Acc@5: 100.0000 (99.7901)LR: 1.587e-03
Train: 45 [ 250/390]  Loss: 0.1446 (0.249)  Acc@1: 95.3125 (91.1168)  Acc@5: 100.0000 (99.8070)LR: 1.587e-03
Train: 45 [ 300/390]  Loss: 0.1828 (0.247)  Acc@1: 92.1875 (91.2220)  Acc@5: 100.0000 (99.8131)LR: 1.587e-03
Train: 45 [ 350/390]  Loss: 0.2589 (0.249)  Acc@1: 90.6250 (91.1147)  Acc@5: 100.0000 (99.8130)LR: 1.587e-03
Train: 45 [ 390/390]  Loss: 0.4919 (0.249)  Acc@1: 87.5000 (91.1680)  Acc@5: 100.0000 (99.8120)LR: 1.587e-03
train_acc 91.168000
Valid: 45 [   0/390]  Loss: 0.4670 (0.467)  Acc@1: 84.3750 (84.3750)  Acc@5: 100.0000 (100.0000)
Valid: 45 [  50/390]  Loss: 0.2053 (0.408)  Acc@1: 90.6250 (86.8873)  Acc@5: 100.0000 (99.5711)
Valid: 45 [ 100/390]  Loss: 0.2223 (0.401)  Acc@1: 92.1875 (87.0668)  Acc@5: 100.0000 (99.4740)
Valid: 45 [ 150/390]  Loss: 0.3250 (0.399)  Acc@1: 90.6250 (87.0861)  Acc@5: 100.0000 (99.4619)
Valid: 45 [ 200/390]  Loss: 0.3440 (0.397)  Acc@1: 89.0625 (87.2046)  Acc@5: 100.0000 (99.4481)
Valid: 45 [ 250/390]  Loss: 0.2681 (0.387)  Acc@1: 89.0625 (87.4938)  Acc@5: 100.0000 (99.4335)
Valid: 45 [ 300/390]  Loss: 0.3107 (0.385)  Acc@1: 89.0625 (87.4585)  Acc@5: 100.0000 (99.4601)
Valid: 45 [ 350/390]  Loss: 0.4635 (0.386)  Acc@1: 89.0625 (87.4688)  Acc@5: 100.0000 (99.4658)
Valid: 45 [ 390/390]  Loss: 0.3488 (0.386)  Acc@1: 92.5000 (87.5200)  Acc@5: 97.5000 (99.4680)
valid_acc 87.520000
epoch = 45   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1499, 0.0568, 0.0532, 0.1051, 0.2584, 0.1383, 0.1229, 0.1155],
        [0.2623, 0.0437, 0.0410, 0.0696, 0.1607, 0.1383, 0.1403, 0.1440],
        [0.1791, 0.0577, 0.0588, 0.1042, 0.1965, 0.1906, 0.1217, 0.0914],
        [0.4106, 0.0430, 0.0446, 0.0719, 0.1025, 0.0951, 0.1320, 0.1003],
        [0.3097, 0.0351, 0.0398, 0.0769, 0.1452, 0.1472, 0.1169, 0.1292],
        [0.3041, 0.0565, 0.0540, 0.0952, 0.1384, 0.1214, 0.1107, 0.1198],
        [0.4178, 0.0421, 0.0408, 0.0661, 0.1292, 0.1032, 0.0936, 0.1071],
        [0.4786, 0.0289, 0.0309, 0.0576, 0.0807, 0.1231, 0.0985, 0.1018],
        [0.5389, 0.0251, 0.0268, 0.0409, 0.0956, 0.0823, 0.0857, 0.1049],
        [0.4012, 0.0555, 0.0558, 0.0983, 0.0941, 0.0960, 0.1036, 0.0955],
        [0.4821, 0.0392, 0.0405, 0.0624, 0.1120, 0.0790, 0.1044, 0.0804],
        [0.5981, 0.0268, 0.0287, 0.0533, 0.0835, 0.0656, 0.0716, 0.0723],
        [0.5664, 0.0257, 0.0280, 0.0427, 0.0800, 0.0780, 0.0837, 0.0956],
        [0.6523, 0.0228, 0.0250, 0.0339, 0.0659, 0.0663, 0.0644, 0.0695]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1084, 0.1293, 0.1172, 0.1252, 0.1454, 0.1219, 0.1336, 0.1191],
        [0.1255, 0.0936, 0.0811, 0.1312, 0.1589, 0.1391, 0.1549, 0.1159],
        [0.1038, 0.1398, 0.1324, 0.1296, 0.1191, 0.1214, 0.1141, 0.1398],
        [0.1320, 0.1001, 0.0955, 0.1235, 0.1320, 0.1711, 0.1441, 0.1017],
        [0.1340, 0.0718, 0.0762, 0.1357, 0.1407, 0.1217, 0.1444, 0.1755],
        [0.1148, 0.1166, 0.1261, 0.1455, 0.1343, 0.1178, 0.1113, 0.1336],
        [0.1247, 0.1089, 0.1091, 0.1348, 0.1385, 0.1392, 0.1319, 0.1129],
        [0.1341, 0.0708, 0.0803, 0.1408, 0.1474, 0.1430, 0.1410, 0.1427],
        [0.1473, 0.0602, 0.0773, 0.1235, 0.1264, 0.1423, 0.1841, 0.1389],
        [0.1005, 0.1245, 0.1197, 0.1531, 0.1301, 0.1370, 0.1346, 0.1005],
        [0.1122, 0.1066, 0.1047, 0.1442, 0.1608, 0.1243, 0.1227, 0.1245],
        [0.1276, 0.0738, 0.0815, 0.1554, 0.1491, 0.1354, 0.1412, 0.1360],
        [0.1482, 0.0626, 0.0759, 0.1328, 0.1306, 0.1575, 0.1627, 0.1298],
        [0.1662, 0.0657, 0.0741, 0.1200, 0.1554, 0.1571, 0.1132, 0.1482]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 46 [   0/390]  Loss: 0.2512 (0.251)  Acc@1: 89.0625 (89.0625)  Acc@5: 100.0000 (100.0000)LR: 1.377e-03
Train: 46 [  50/390]  Loss: 0.2455 (0.241)  Acc@1: 93.7500 (91.2990)  Acc@5: 100.0000 (99.9694)LR: 1.377e-03
Train: 46 [ 100/390]  Loss: 0.2798 (0.252)  Acc@1: 93.7500 (90.8571)  Acc@5: 100.0000 (99.9072)LR: 1.377e-03
Train: 46 [ 150/390]  Loss: 0.4580 (0.248)  Acc@1: 87.5000 (90.9044)  Acc@5: 100.0000 (99.8965)LR: 1.377e-03
Train: 46 [ 200/390]  Loss: 0.2754 (0.249)  Acc@1: 89.0625 (91.0525)  Acc@5: 100.0000 (99.8912)LR: 1.377e-03
Train: 46 [ 250/390]  Loss: 0.2853 (0.245)  Acc@1: 90.6250 (91.2973)  Acc@5: 98.4375 (99.8879)LR: 1.377e-03
Train: 46 [ 300/390]  Loss: 0.1946 (0.245)  Acc@1: 95.3125 (91.3310)  Acc@5: 100.0000 (99.8910)LR: 1.377e-03
Train: 46 [ 350/390]  Loss: 0.1873 (0.241)  Acc@1: 93.7500 (91.4441)  Acc@5: 100.0000 (99.8798)LR: 1.377e-03
Train: 46 [ 390/390]  Loss: 0.2204 (0.243)  Acc@1: 95.0000 (91.3760)  Acc@5: 100.0000 (99.8760)LR: 1.377e-03
train_acc 91.376000
Valid: 46 [   0/390]  Loss: 0.5054 (0.505)  Acc@1: 84.3750 (84.3750)  Acc@5: 100.0000 (100.0000)
Valid: 46 [  50/390]  Loss: 0.2003 (0.375)  Acc@1: 90.6250 (87.3162)  Acc@5: 100.0000 (99.4179)
Valid: 46 [ 100/390]  Loss: 0.4511 (0.368)  Acc@1: 85.9375 (87.7011)  Acc@5: 98.4375 (99.4740)
Valid: 46 [ 150/390]  Loss: 0.2705 (0.366)  Acc@1: 89.0625 (87.7380)  Acc@5: 100.0000 (99.4723)
Valid: 46 [ 250/390]  Loss: 0.2328 (0.365)  Acc@1: 92.1875 (87.7179)  Acc@5: 98.4375 (99.4335)
Valid: 46 [ 350/390]  Loss: 0.6129 (0.371)  Acc@1: 82.8125 (87.5223)  Acc@5: 96.8750 (99.4658)
Valid: 46 [ 200/390]  Loss: 0.3494 (0.362)  Acc@1: 84.3750 (87.8032)  Acc@5: 98.4375 (99.4714)
Valid: 46 [ 300/390]  Loss: 0.2960 (0.369)  Acc@1: 85.9375 (87.5986)  Acc@5: 100.0000 (99.4446)
Valid: 46 [ 390/390]  Loss: 0.3203 (0.371)  Acc@1: 85.0000 (87.4680)  Acc@5: 100.0000 (99.4760)
valid_acc 87.468000
epoch = 46   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1517, 0.0557, 0.0528, 0.1047, 0.2584, 0.1387, 0.1226, 0.1155],
        [0.2646, 0.0429, 0.0405, 0.0689, 0.1614, 0.1380, 0.1398, 0.1441],
        [0.1830, 0.0566, 0.0583, 0.1041, 0.1947, 0.1899, 0.1223, 0.0912],
        [0.4192, 0.0419, 0.0436, 0.0702, 0.1014, 0.0935, 0.1307, 0.0994],
        [0.3154, 0.0342, 0.0392, 0.0759, 0.1454, 0.1465, 0.1161, 0.1274],
        [0.3127, 0.0555, 0.0533, 0.0945, 0.1365, 0.1211, 0.1082, 0.1182],
        [0.4257, 0.0414, 0.0402, 0.0652, 0.1283, 0.1010, 0.0921, 0.1061],
        [0.4877, 0.0285, 0.0306, 0.0569, 0.0784, 0.1208, 0.0964, 0.1007],
        [0.5490, 0.0247, 0.0264, 0.0401, 0.0931, 0.0800, 0.0839, 0.1029],
        [0.4129, 0.0542, 0.0549, 0.0972, 0.0920, 0.0943, 0.1015, 0.0931],
        [0.4926, 0.0385, 0.0399, 0.0615, 0.1097, 0.0772, 0.1020, 0.0786],
        [0.6103, 0.0261, 0.0282, 0.0521, 0.0805, 0.0635, 0.0692, 0.0702],
        [0.5766, 0.0253, 0.0277, 0.0420, 0.0776, 0.0757, 0.0822, 0.0928],
        [0.6639, 0.0223, 0.0245, 0.0333, 0.0637, 0.0639, 0.0613, 0.0670]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1095, 0.1266, 0.1155, 0.1245, 0.1470, 0.1229, 0.1348, 0.1192],
        [0.1240, 0.0929, 0.0805, 0.1312, 0.1596, 0.1392, 0.1566, 0.1162],
        [0.1056, 0.1379, 0.1312, 0.1298, 0.1187, 0.1212, 0.1143, 0.1413],
        [0.1318, 0.0997, 0.0953, 0.1243, 0.1323, 0.1712, 0.1439, 0.1014],
        [0.1327, 0.0714, 0.0763, 0.1356, 0.1412, 0.1221, 0.1453, 0.1755],
        [0.1160, 0.1143, 0.1244, 0.1465, 0.1351, 0.1185, 0.1106, 0.1345],
        [0.1244, 0.1085, 0.1089, 0.1351, 0.1383, 0.1400, 0.1317, 0.1130],
        [0.1341, 0.0700, 0.0799, 0.1400, 0.1494, 0.1437, 0.1409, 0.1421],
        [0.1460, 0.0591, 0.0760, 0.1208, 0.1270, 0.1438, 0.1873, 0.1399],
        [0.1018, 0.1226, 0.1183, 0.1524, 0.1307, 0.1385, 0.1358, 0.0999],
        [0.1116, 0.1065, 0.1047, 0.1453, 0.1615, 0.1246, 0.1220, 0.1238],
        [0.1270, 0.0738, 0.0816, 0.1556, 0.1492, 0.1358, 0.1415, 0.1355],
        [0.1480, 0.0621, 0.0753, 0.1316, 0.1319, 0.1582, 0.1628, 0.1300],
        [0.1643, 0.0654, 0.0739, 0.1196, 0.1573, 0.1580, 0.1130, 0.1485]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 47 [   0/390]  Loss: 0.2161 (0.216)  Acc@1: 93.7500 (93.7500)  Acc@5: 98.4375 (98.4375)LR: 1.213e-03
Train: 47 [  50/390]  Loss: 0.2309 (0.212)  Acc@1: 90.6250 (92.5551)  Acc@5: 100.0000 (99.9694)LR: 1.213e-03
Train: 47 [ 100/390]  Loss: 0.1749 (0.226)  Acc@1: 93.7500 (92.1566)  Acc@5: 100.0000 (99.9226)LR: 1.213e-03
Train: 47 [ 150/390]  Loss: 0.2860 (0.238)  Acc@1: 90.6250 (91.6598)  Acc@5: 98.4375 (99.8758)LR: 1.213e-03
Train: 47 [ 200/390]  Loss: 0.2154 (0.243)  Acc@1: 92.1875 (91.6278)  Acc@5: 98.4375 (99.7979)LR: 1.213e-03
Train: 47 [ 250/390]  Loss: 0.2334 (0.244)  Acc@1: 90.6250 (91.5463)  Acc@5: 100.0000 (99.8132)LR: 1.213e-03
Train: 47 [ 300/390]  Loss: 0.1748 (0.247)  Acc@1: 93.7500 (91.4088)  Acc@5: 100.0000 (99.7975)LR: 1.213e-03
Train: 47 [ 350/390]  Loss: 0.2844 (0.248)  Acc@1: 90.6250 (91.3328)  Acc@5: 100.0000 (99.8086)LR: 1.213e-03
Train: 47 [ 390/390]  Loss: 0.2881 (0.248)  Acc@1: 87.5000 (91.3400)  Acc@5: 100.0000 (99.8200)LR: 1.213e-03
train_acc 91.340000
Valid: 47 [   0/390]  Loss: 0.4580 (0.458)  Acc@1: 81.2500 (81.2500)  Acc@5: 100.0000 (100.0000)
Valid: 47 [  50/390]  Loss: 0.5822 (0.422)  Acc@1: 81.2500 (85.8762)  Acc@5: 98.4375 (99.2341)
Valid: 47 [ 100/390]  Loss: 0.4753 (0.409)  Acc@1: 81.2500 (86.2005)  Acc@5: 98.4375 (99.3502)
Valid: 47 [ 150/390]  Loss: 0.5627 (0.400)  Acc@1: 85.9375 (86.4963)  Acc@5: 98.4375 (99.3791)
Valid: 47 [ 200/390]  Loss: 0.4073 (0.407)  Acc@1: 89.0625 (86.2795)  Acc@5: 98.4375 (99.4248)
Valid: 47 [ 250/390]  Loss: 0.5462 (0.406)  Acc@1: 87.5000 (86.3297)  Acc@5: 100.0000 (99.4646)
Valid: 47 [ 300/390]  Loss: 0.3330 (0.404)  Acc@1: 89.0625 (86.3839)  Acc@5: 100.0000 (99.4549)
Valid: 47 [ 350/390]  Loss: 0.2678 (0.407)  Acc@1: 87.5000 (86.3248)  Acc@5: 100.0000 (99.4614)
Valid: 47 [ 390/390]  Loss: 0.5204 (0.407)  Acc@1: 80.0000 (86.3720)  Acc@5: 100.0000 (99.4360)
valid_acc 86.372000
epoch = 47   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1529, 0.0548, 0.0525, 0.1049, 0.2584, 0.1392, 0.1218, 0.1155],
        [0.2675, 0.0421, 0.0403, 0.0683, 0.1613, 0.1361, 0.1402, 0.1443],
        [0.1850, 0.0556, 0.0580, 0.1039, 0.1958, 0.1890, 0.1213, 0.0913],
        [0.4297, 0.0410, 0.0432, 0.0693, 0.0997, 0.0917, 0.1279, 0.0975],
        [0.3213, 0.0337, 0.0390, 0.0753, 0.1463, 0.1450, 0.1147, 0.1249],
        [0.3207, 0.0542, 0.0527, 0.0938, 0.1363, 0.1195, 0.1059, 0.1168],
        [0.4361, 0.0402, 0.0398, 0.0643, 0.1259, 0.0987, 0.0905, 0.1045],
        [0.4983, 0.0280, 0.0303, 0.0561, 0.0766, 0.1179, 0.0941, 0.0987],
        [0.5591, 0.0242, 0.0260, 0.0395, 0.0910, 0.0779, 0.0814, 0.1009],
        [0.4225, 0.0530, 0.0542, 0.0963, 0.0899, 0.0928, 0.0998, 0.0914],
        [0.4993, 0.0380, 0.0397, 0.0612, 0.1076, 0.0762, 0.1006, 0.0773],
        [0.6189, 0.0257, 0.0280, 0.0512, 0.0787, 0.0622, 0.0672, 0.0681],
        [0.5844, 0.0249, 0.0274, 0.0415, 0.0762, 0.0744, 0.0809, 0.0902],
        [0.6734, 0.0220, 0.0243, 0.0330, 0.0615, 0.0621, 0.0587, 0.0650]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1097, 0.1253, 0.1150, 0.1243, 0.1488, 0.1223, 0.1360, 0.1187],
        [0.1238, 0.0916, 0.0796, 0.1305, 0.1602, 0.1391, 0.1578, 0.1175],
        [0.1058, 0.1371, 0.1309, 0.1299, 0.1176, 0.1212, 0.1152, 0.1423],
        [0.1324, 0.0988, 0.0947, 0.1245, 0.1316, 0.1721, 0.1446, 0.1012],
        [0.1315, 0.0708, 0.0764, 0.1350, 0.1427, 0.1220, 0.1449, 0.1768],
        [0.1166, 0.1135, 0.1238, 0.1484, 0.1347, 0.1197, 0.1092, 0.1341],
        [0.1251, 0.1082, 0.1089, 0.1354, 0.1376, 0.1402, 0.1311, 0.1135],
        [0.1342, 0.0693, 0.0798, 0.1394, 0.1502, 0.1439, 0.1412, 0.1420],
        [0.1444, 0.0583, 0.0754, 0.1198, 0.1279, 0.1453, 0.1886, 0.1402],
        [0.1022, 0.1215, 0.1175, 0.1544, 0.1301, 0.1382, 0.1365, 0.0997],
        [0.1117, 0.1062, 0.1052, 0.1459, 0.1607, 0.1253, 0.1221, 0.1230],
        [0.1260, 0.0734, 0.0817, 0.1550, 0.1500, 0.1360, 0.1433, 0.1346],
        [0.1475, 0.0616, 0.0753, 0.1313, 0.1317, 0.1593, 0.1630, 0.1303],
        [0.1632, 0.0650, 0.0741, 0.1195, 0.1582, 0.1584, 0.1127, 0.1489]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 48 [   0/390]  Loss: 0.2466 (0.247)  Acc@1: 92.1875 (92.1875)  Acc@5: 100.0000 (100.0000)LR: 1.095e-03
Train: 48 [  50/390]  Loss: 0.3041 (0.247)  Acc@1: 85.9375 (91.6667)  Acc@5: 100.0000 (99.7549)LR: 1.095e-03
Train: 48 [ 100/390]  Loss: 0.1465 (0.255)  Acc@1: 95.3125 (91.3057)  Acc@5: 100.0000 (99.8453)LR: 1.095e-03
Train: 48 [ 150/390]  Loss: 0.3076 (0.250)  Acc@1: 90.6250 (91.4942)  Acc@5: 100.0000 (99.8551)LR: 1.095e-03
Train: 48 [ 200/390]  Loss: 0.2924 (0.250)  Acc@1: 90.6250 (91.3402)  Acc@5: 98.4375 (99.8601)LR: 1.095e-03
Train: 48 [ 250/390]  Loss: 0.09200 (0.252)  Acc@1: 98.4375 (91.3222)  Acc@5: 100.0000 (99.8381)LR: 1.095e-03
Train: 48 [ 300/390]  Loss: 0.3077 (0.254)  Acc@1: 93.7500 (91.2739)  Acc@5: 100.0000 (99.8287)LR: 1.095e-03
Train: 48 [ 350/390]  Loss: 0.3702 (0.252)  Acc@1: 87.5000 (91.3373)  Acc@5: 98.4375 (99.8308)LR: 1.095e-03
Train: 48 [ 390/390]  Loss: 0.3229 (0.253)  Acc@1: 87.5000 (91.3480)  Acc@5: 100.0000 (99.8320)LR: 1.095e-03
train_acc 91.348000
Valid: 48 [   0/390]  Loss: 0.3962 (0.396)  Acc@1: 82.8125 (82.8125)  Acc@5: 100.0000 (100.0000)
Valid: 48 [  50/390]  Loss: 0.3912 (0.423)  Acc@1: 84.3750 (85.4473)  Acc@5: 96.8750 (99.3873)
Valid: 48 [ 100/390]  Loss: 0.3851 (0.409)  Acc@1: 82.8125 (86.0303)  Acc@5: 100.0000 (99.3812)
Valid: 48 [ 150/390]  Loss: 0.3822 (0.400)  Acc@1: 87.5000 (86.3204)  Acc@5: 100.0000 (99.3584)
Valid: 48 [ 200/390]  Loss: 0.1463 (0.394)  Acc@1: 93.7500 (86.5594)  Acc@5: 100.0000 (99.3548)
Valid: 48 [ 250/390]  Loss: 0.4243 (0.399)  Acc@1: 85.9375 (86.5413)  Acc@5: 98.4375 (99.3401)
Valid: 48 [ 300/390]  Loss: 0.4243 (0.399)  Acc@1: 85.9375 (86.5864)  Acc@5: 100.0000 (99.3875)
Valid: 48 [ 350/390]  Loss: 0.3555 (0.397)  Acc@1: 85.9375 (86.7343)  Acc@5: 100.0000 (99.3857)
Valid: 48 [ 390/390]  Loss: 0.6583 (0.399)  Acc@1: 82.5000 (86.5720)  Acc@5: 100.0000 (99.4080)
valid_acc 86.572000
epoch = 48   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1532, 0.0536, 0.0516, 0.1039, 0.2613, 0.1395, 0.1215, 0.1153],
        [0.2718, 0.0411, 0.0398, 0.0675, 0.1619, 0.1350, 0.1398, 0.1431],
        [0.1871, 0.0549, 0.0575, 0.1038, 0.1966, 0.1887, 0.1202, 0.0913],
        [0.4416, 0.0400, 0.0424, 0.0679, 0.0981, 0.0893, 0.1259, 0.0949],
        [0.3270, 0.0332, 0.0384, 0.0745, 0.1467, 0.1432, 0.1134, 0.1237],
        [0.3283, 0.0533, 0.0521, 0.0936, 0.1354, 0.1184, 0.1034, 0.1155],
        [0.4472, 0.0393, 0.0391, 0.0633, 0.1247, 0.0958, 0.0886, 0.1021],
        [0.5058, 0.0277, 0.0300, 0.0554, 0.0751, 0.1158, 0.0928, 0.0973],
        [0.5679, 0.0239, 0.0258, 0.0391, 0.0893, 0.0762, 0.0785, 0.0993],
        [0.4333, 0.0519, 0.0531, 0.0955, 0.0881, 0.0910, 0.0973, 0.0897],
        [0.5111, 0.0371, 0.0389, 0.0601, 0.1055, 0.0740, 0.0983, 0.0750],
        [0.6271, 0.0254, 0.0277, 0.0503, 0.0770, 0.0609, 0.0653, 0.0663],
        [0.5927, 0.0246, 0.0270, 0.0409, 0.0750, 0.0729, 0.0793, 0.0875],
        [0.6829, 0.0216, 0.0238, 0.0326, 0.0595, 0.0608, 0.0562, 0.0627]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1100, 0.1246, 0.1144, 0.1244, 0.1492, 0.1225, 0.1369, 0.1181],
        [0.1233, 0.0905, 0.0784, 0.1300, 0.1623, 0.1401, 0.1580, 0.1174],
        [0.1065, 0.1369, 0.1305, 0.1301, 0.1170, 0.1215, 0.1149, 0.1426],
        [0.1327, 0.0981, 0.0937, 0.1246, 0.1309, 0.1738, 0.1445, 0.1018],
        [0.1307, 0.0707, 0.0762, 0.1347, 0.1429, 0.1216, 0.1443, 0.1788],
        [0.1174, 0.1128, 0.1230, 0.1489, 0.1343, 0.1197, 0.1092, 0.1346],
        [0.1256, 0.1079, 0.1081, 0.1356, 0.1371, 0.1415, 0.1311, 0.1131],
        [0.1331, 0.0690, 0.0793, 0.1385, 0.1513, 0.1444, 0.1422, 0.1422],
        [0.1441, 0.0580, 0.0746, 0.1184, 0.1290, 0.1456, 0.1901, 0.1401],
        [0.1026, 0.1210, 0.1166, 0.1552, 0.1301, 0.1376, 0.1368, 0.1000],
        [0.1116, 0.1061, 0.1044, 0.1468, 0.1609, 0.1258, 0.1215, 0.1227],
        [0.1253, 0.0734, 0.0815, 0.1546, 0.1499, 0.1366, 0.1427, 0.1361],
        [0.1466, 0.0617, 0.0751, 0.1308, 0.1320, 0.1598, 0.1633, 0.1306],
        [0.1624, 0.0652, 0.0740, 0.1195, 0.1593, 0.1598, 0.1116, 0.1482]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Train: 49 [   0/390]  Loss: 0.2054 (0.205)  Acc@1: 90.6250 (90.6250)  Acc@5: 100.0000 (100.0000)LR: 1.024e-03
Train: 49 [  50/390]  Loss: 0.2924 (0.245)  Acc@1: 87.5000 (91.8505)  Acc@5: 100.0000 (99.8775)LR: 1.024e-03
Train: 49 [ 100/390]  Loss: 0.2030 (0.258)  Acc@1: 93.7500 (91.3366)  Acc@5: 100.0000 (99.7834)LR: 1.024e-03
Train: 49 [ 150/390]  Loss: 0.3117 (0.251)  Acc@1: 89.0625 (91.4942)  Acc@5: 100.0000 (99.8551)LR: 1.024e-03
Train: 49 [ 200/390]  Loss: 0.4829 (0.252)  Acc@1: 82.8125 (91.3635)  Acc@5: 98.4375 (99.8368)LR: 1.024e-03
Train: 49 [ 250/390]  Loss: 0.2537 (0.250)  Acc@1: 89.0625 (91.4156)  Acc@5: 100.0000 (99.8319)LR: 1.024e-03
Train: 49 [ 300/390]  Loss: 0.3199 (0.250)  Acc@1: 87.5000 (91.4452)  Acc@5: 98.4375 (99.8287)LR: 1.024e-03
Train: 49 [ 350/390]  Loss: 0.1388 (0.249)  Acc@1: 93.7500 (91.4396)  Acc@5: 100.0000 (99.8219)LR: 1.024e-03
Train: 49 [ 390/390]  Loss: 0.3457 (0.249)  Acc@1: 92.5000 (91.4120)  Acc@5: 100.0000 (99.8240)LR: 1.024e-03
train_acc 91.412000
Valid: 49 [   0/390]  Loss: 0.4779 (0.478)  Acc@1: 79.6875 (79.6875)  Acc@5: 98.4375 (98.4375)
Valid: 49 [  50/390]  Loss: 0.2228 (0.363)  Acc@1: 93.7500 (87.4694)  Acc@5: 100.0000 (99.4792)
Valid: 49 [ 100/390]  Loss: 0.2599 (0.361)  Acc@1: 90.6250 (87.7939)  Acc@5: 100.0000 (99.5204)
Valid: 49 [ 150/390]  Loss: 0.4778 (0.371)  Acc@1: 81.2500 (87.4793)  Acc@5: 100.0000 (99.5137)
Valid: 49 [ 200/390]  Loss: 0.3210 (0.364)  Acc@1: 85.9375 (87.6244)  Acc@5: 100.0000 (99.5103)
Valid: 49 [ 250/390]  Loss: 0.3306 (0.358)  Acc@1: 92.1875 (87.7926)  Acc@5: 100.0000 (99.5393)
Valid: 49 [ 300/390]  Loss: 0.5245 (0.363)  Acc@1: 82.8125 (87.5779)  Acc@5: 98.4375 (99.5432)
Valid: 49 [ 350/390]  Loss: 0.1707 (0.363)  Acc@1: 90.6250 (87.6024)  Acc@5: 100.0000 (99.5192)
Valid: 49 [ 390/390]  Loss: 0.2860 (0.366)  Acc@1: 90.0000 (87.5960)  Acc@5: 100.0000 (99.4880)
valid_acc 87.596000
epoch = 49   
 genotype = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 2), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('dil_conv_3x3', 0)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('sep_conv_5x5', 1), ('dil_conv_3x3', 3), ('skip_connect', 0), ('dil_conv_3x3', 3), ('sep_conv_3x3', 1)], reduce_concat=range(2, 6))
alphas_normal = 
 tensor([[0.1554, 0.0527, 0.0513, 0.1032, 0.2603, 0.1402, 0.1208, 0.1163],
        [0.2738, 0.0405, 0.0396, 0.0668, 0.1614, 0.1341, 0.1399, 0.1439],
        [0.1890, 0.0540, 0.0571, 0.1031, 0.1963, 0.1887, 0.1207, 0.0911],
        [0.4507, 0.0392, 0.0419, 0.0668, 0.0958, 0.0888, 0.1239, 0.0929],
        [0.3326, 0.0327, 0.0382, 0.0737, 0.1452, 0.1420, 0.1129, 0.1227],
        [0.3357, 0.0522, 0.0516, 0.0926, 0.1334, 0.1174, 0.1021, 0.1150],
        [0.4555, 0.0387, 0.0390, 0.0626, 0.1235, 0.0945, 0.0864, 0.0998],
        [0.5137, 0.0273, 0.0298, 0.0547, 0.0734, 0.1134, 0.0911, 0.0965],
        [0.5778, 0.0234, 0.0254, 0.0384, 0.0872, 0.0742, 0.0765, 0.0971],
        [0.4417, 0.0509, 0.0524, 0.0943, 0.0864, 0.0894, 0.0963, 0.0886],
        [0.5199, 0.0365, 0.0386, 0.0594, 0.1033, 0.0724, 0.0960, 0.0739],
        [0.6373, 0.0249, 0.0274, 0.0492, 0.0747, 0.0593, 0.0631, 0.0642],
        [0.6029, 0.0242, 0.0267, 0.0403, 0.0727, 0.0717, 0.0768, 0.0848],
        [0.6910, 0.0213, 0.0236, 0.0322, 0.0575, 0.0594, 0.0541, 0.0609]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
 alphas_reduct = 
 tensor([[0.1102, 0.1243, 0.1145, 0.1243, 0.1494, 0.1225, 0.1368, 0.1180],
        [0.1232, 0.0894, 0.0776, 0.1295, 0.1645, 0.1402, 0.1580, 0.1176],
        [0.1066, 0.1356, 0.1294, 0.1314, 0.1167, 0.1219, 0.1157, 0.1427],
        [0.1335, 0.0969, 0.0930, 0.1241, 0.1313, 0.1744, 0.1452, 0.1016],
        [0.1302, 0.0705, 0.0763, 0.1342, 0.1440, 0.1215, 0.1432, 0.1803],
        [0.1176, 0.1115, 0.1219, 0.1513, 0.1349, 0.1207, 0.1083, 0.1338],
        [0.1263, 0.1069, 0.1073, 0.1369, 0.1367, 0.1421, 0.1303, 0.1135],
        [0.1317, 0.0690, 0.0797, 0.1380, 0.1508, 0.1458, 0.1426, 0.1423],
        [0.1440, 0.0579, 0.0743, 0.1174, 0.1298, 0.1446, 0.1911, 0.1408],
        [0.1029, 0.1199, 0.1162, 0.1565, 0.1297, 0.1371, 0.1375, 0.1002],
        [0.1121, 0.1049, 0.1033, 0.1467, 0.1620, 0.1264, 0.1212, 0.1235],
        [0.1245, 0.0732, 0.0821, 0.1546, 0.1497, 0.1370, 0.1425, 0.1364],
        [0.1456, 0.0618, 0.0751, 0.1302, 0.1318, 0.1605, 0.1630, 0.1319],
        [0.1611, 0.0651, 0.0742, 0.1198, 0.1605, 0.1600, 0.1110, 0.1483]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
